ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1487943 - in /ctakes/sandbox/ctakes-sectionizer: ./ .settings/ src/ src/main/ src/main/java/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/ctakes/ src/main/java/org/apache/ctakes/core/ src/main/java/org/apache/ctake...
Date Thu, 30 May 2013 17:26:52 GMT
Author: chenpei
Date: Thu May 30 17:26:51 2013
New Revision: 1487943

URL: http://svn.apache.org/r1487943
Log:
CTAKES-200 - Add a sectionizer that will normalize to HL7/CCDA standards.

Added:
    ctakes/sandbox/ctakes-sectionizer/
    ctakes/sandbox/ctakes-sectionizer/.classpath   (with props)
    ctakes/sandbox/ctakes-sectionizer/.project   (with props)
    ctakes/sandbox/ctakes-sectionizer/.settings/
    ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs
    ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs
    ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs
    ctakes/sandbox/ctakes-sectionizer/pom.xml   (with props)
    ctakes/sandbox/ctakes-sectionizer/src/
    ctakes/sandbox/ctakes-sectionizer/src/main/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
  (with props)
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
  (with props)
    ctakes/sandbox/ctakes-sectionizer/src/test/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
  (with props)
    ctakes/sandbox/ctakes-sectionizer/src/test/resources/

Added: ctakes/sandbox/ctakes-sectionizer/.classpath
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.classpath?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.classpath (added)
+++ ctakes/sandbox/ctakes-sectionizer/.classpath Thu May 30 17:26:51 2013
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>

Propchange: ctakes/sandbox/ctakes-sectionizer/.classpath
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-sectionizer/.project
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.project?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.project (added)
+++ ctakes/sandbox/ctakes-sectionizer/.project Thu May 30 17:26:51 2013
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ctakes-sectionizer</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>

Propchange: ctakes/sandbox/ctakes-sectionizer/.project
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.core.resources.prefs Thu May 30
17:26:51 2013
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8

Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.jdt.core.prefs Thu May 30 17:26:51
2013
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6

Added: ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs (added)
+++ ctakes/sandbox/ctakes-sectionizer/.settings/org.eclipse.m2e.core.prefs Thu May 30 17:26:51
2013
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1

Added: ctakes/sandbox/ctakes-sectionizer/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/pom.xml?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/pom.xml (added)
+++ ctakes/sandbox/ctakes-sectionizer/pom.xml Thu May 30 17:26:51 2013
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>org.apache.ctakes</groupId>
+		<artifactId>ctakes</artifactId>
+		<version>3.1.0-SNAPSHOT</version>
+	</parent>
+	<artifactId>ctakes-sectionizer</artifactId>
+	<name>ctakes-sectionizer</name>
+	<description>ctakes-sectionizer</description>
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-core</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-util</artifactId>
+		</dependency>		
+	</dependencies>
+</project>
\ No newline at end of file

Propchange: ctakes/sandbox/ctakes-sectionizer/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
(added)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
Thu May 30 17:26:51 2013
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.core.ae;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.descriptor.ConfigurationParameter;
+
+/**
+ * Creates segment annotations based on the ccda_sections.txt file Which is
+ * based on HL7/CCDA/LONIC standard headings Additional custom heading names can
+ * be added to the file.
+ */
+public class CDASegmentAnnotator extends JCasAnnotator_ImplBase {
+
+	Logger logger = Logger.getLogger(this.getClass());
+	protected static HashMap<String, Pattern> patterns = new HashMap<String, Pattern>();
+	protected static final String DEFAULT_SECTION_FILE_NAME = "org/apache/ctakes/core/sections/ccda_sections.txt";
+	// Field seperator assumes the sections mapping file is comma delimited
+	// unlikely to change.
+	public static final String PARAM_FIELD_SEPERATOR = ",";
+	public static final String PARAM_COMMENT = "#";
+	public static final String PARAM_SECTIONS_FILE = "sections_file";
+
+	@ConfigurationParameter(name = PARAM_SECTIONS_FILE, description = "Path to File that contains
the section header mappings")
+	protected URI sections_path;
+
+	/**
+	 * Init and load the sections mapping file and precompile the regex matches
+	 * into a hashmap
+	 */
+	public void initialize(UimaContext aContext)
+			throws ResourceInitializationException {
+		super.initialize(aContext);
+		String sectionFile = null;
+		try {
+			sectionFile = (String) aContext
+					.getConfigParameterValue(PARAM_SECTIONS_FILE);
+			URL sectionURL = (this.sections_path == null) ? this.getClass()
+					.getClassLoader().getResource(DEFAULT_SECTION_FILE_NAME)
+					.toURI().toURL() : this.sections_path.toURL();
+
+			BufferedReader br = new BufferedReader(new InputStreamReader(
+					sectionURL.openStream()));
+
+			// Read in the Section Mappings File
+			// And load the RegEx Patterns into a Map
+			logger.info("Reading Section File " + sectionURL);
+			String line = null;
+			while ((line = br.readLine()) != null) {
+				if (!line.trim().startsWith(PARAM_COMMENT)) {
+					String[] l = line.split(PARAM_FIELD_SEPERATOR);
+					// First column is the HL7 section template id
+					if (l != null && l.length > 0 && l[0] != null
+							&& l[0].length() > 0
+							&& !line.endsWith(PARAM_FIELD_SEPERATOR)) {
+						String id = l[0].trim();
+						// Make a giant alternator (|) regex group for each HL7
+						Pattern p = buildPattern(l);
+						patterns.put(id, p);
+					} else {
+						logger.info("Warning: Skipped reading sections config row: "
+								+ Arrays.toString(l));
+					}
+				}
+			}
+		} catch (Exception e) {
+			logger.error("Error reading Sections file:" + sectionFile);
+			throw new ResourceInitializationException(e);
+		}
+	}
+
+	/**
+	 * Build a regex pattern from a list of section names. used only during init
+	 * time
+	 */
+	private static Pattern buildPattern(String[] line) {
+		StringBuffer sb = new StringBuffer();
+		for (int i = 1; i < line.length; i++) {
+			sb.append(line[i].trim());
+			if (i != line.length - 1) {
+				sb.append("|");
+			}
+		}
+		Pattern p = Pattern.compile("(" + sb + ")", Pattern.CASE_INSENSITIVE);
+		return p;
+	}
+
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+		String text = jCas.getDocumentText();
+		if (text == null) {
+			String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+			logger.info("text is null for docId=" + docId, null);
+		} else {
+			for (String id : patterns.keySet()) {
+				Pattern p = patterns.get(id);
+				// System.out.println("Pattern" + p);
+				Matcher m = p.matcher(text);
+				while (m.find()) {
+					Segment segment = new Segment(jCas);
+					segment.setBegin(m.start());
+					// TODO: Need to figure out the end of the section
+					segment.setEnd(m.end());
+					segment.setId(id);
+					segment.addToIndexes();
+				}
+			}
+		}
+	}
+}

Propchange: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
(added)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
Thu May 30 17:26:51 2013
@@ -0,0 +1,34 @@
+# This file is used by ctakes-core/sectionizer
+# It uses rules and RegEx to match the section headers
+# It is derived from the Consolidated CDA/HL7 standard
+# http://bluebuttonplus.org/healthrecords.html
+# http://cdatools.org/infocenter/index.jsp
+# The format is as follows:
+# HL7 template id, LOINC Section Code, n list of header names
+# Custom ones can be added to the below mapping file
+# By Default, they are case insenstive and spaces trimmed. 
+
+2.16.840.1.113883.10.20.22.1.1, 34133-9, Header, Patient information and demographics
+2.16.840.1.113883.10.20.22.2.6.1, 48765-2, Allergies, Adverse Reactions, Alerts
+2.16.840.1.113883.10.20.22.2.22.1, 46240-8, History of encounters, Encounters,Surgeries,
ED visits
+2.16.840.1.113883.10.20.22.2.2.1, 11369-6, History of immunizations, Immunizations,Immunizations
and vaccines
+2.16.840.1.113883.10.20.22.2.1.1, 10160-0, HISTORY OF MEDICATION USE, Medications
+2.16.840.1.113883.10.20.22.2.10, 18776-5, Treatment plan, Care Plan
+2.16.840.1.113883.10.20.22.2.11.1, 10183-2, HOSPITAL DISCHARGE MEDICATIONS, Discharge Medications
+1.3.6.1.4.1.19376.1.5.3.1.3.1, 42349-1, Reason for Referral
+2.16.840.1.113883.10.20.22.2.5.1, 11450-4, PROBLEMS, Problem List, Concerns, complaints,
observations
+2.16.840.1.113883.10.20.22.2.7.1, 47519-4, Procedures,	History of procedures
+2.16.840.1.113883.10.20.22.2.14, 47420-5, FUNCTIONAL STATUS, Functional and Cognitive Status,
impairments
+2.16.840.1.113883.10.20.22.2.3.1, 30954-2, Results, laboratory tests, LABORATORY INFORMATION
+2.16.840.1.113883.10.20.22.2.17, 29762-2, Social History, Observations like smoking, drinking
+2.16.840.1.113883.10.20.22.2.4.1, 8716-3, Vital Signs,height, weight, blood pressure
+2.16.840.1.113883.10.20.22.2.41, 8653-8, HOSPITAL DISCHARGE INSTRUCTIONS, Discharge Instructions,
Written discharge instructions
+
+2.16.840.1.113883.10.20.22.2.15, 10157-6, Family History
+1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1, 10154-3, CHIEF COMPLAINT
+2.16.840.1.113883.10.20.22.2.37,55109-3, Complications
+2.16.840.1.113883.10.20.22.2.20, 11348-0, HISTORY OF PAST ILLNESS
+1.3.6.1.4.1.19376.1.5.3.1.3.4, 10164-2, HISTORY OF PRESENT ILLNESS
+2.16.840.1.113883.10.20.2.5, 10210-3, GENERAL STATUS
+2.16.840.1.113883.10.20.22.2.24, 11535-2, Hospital Discharge Diagnosis
+2.16.840.1.113883.10.20.22.2.16, 11493-4, Hospital Discharge Studies Summary
\ No newline at end of file

Propchange: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java?rev=1487943&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
(added)
+++ ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
Thu May 30 17:26:51 2013
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.ae;
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.cleartk.util.cr.FilesCollectionReader;
+import org.junit.Test;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+public class TestCDASegmentAnnotator {
+
+	public static String INPUT_FILE = "../ctakes-regression-test/testdata/input/plaintext/doc2_07543210_sample_current.txt";
+
+	@Test
+	public void TestCDASegmentPipeLine() throws Exception {
+
+		TypeSystemDescription typeSystem = TypeSystemDescriptionFactory
+				.createTypeSystemDescription();
+
+		CollectionReader reader1 = CollectionReaderFactory
+				.createCollectionReader(FilesCollectionReader.class,
+						typeSystem, FilesCollectionReader.PARAM_ROOT_FILE,
+						INPUT_FILE);
+
+		AnalysisEngine sectionAnnotator = AnalysisEngineFactory
+				.createPrimitive(CDASegmentAnnotator.class, typeSystem);
+		AnalysisEngine dumpOutput = AnalysisEngineFactory.createPrimitive(
+				DumpOutputAE.class, typeSystem);
+
+		SimplePipeline.runPipeline(reader1, sectionAnnotator, dumpOutput);
+	}
+
+	public static class DumpOutputAE extends JCasAnnotator_ImplBase {
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+				System.out.println("Segment:" + segment.getId());
+			}
+		}
+	}
+}

Propchange: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message