ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1643405 - in /ctakes/branches/sent-detector-newline-fix/ctakes-core: ./ .settings/ resources/launch/ src/main/java/org/apache/ctakes/core/ae/ src/main/java/org/apache/ctakes/core/cr/ src/main/java/org/apache/ctakes/core/sentence/
Date Fri, 05 Dec 2014 19:18:05 GMT
Author: tmill
Date: Fri Dec  5 19:18:05 2014
New Revision: 1643405

URL: http://svn.apache.org/r1643405
Log:
New features for sentence detector on clinical notes.

Added:
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SDContextGeneratorCtakes.java
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorFactoryCtakes.java
Modified:
    ctakes/branches/sent-detector-newline-fix/ctakes-core/.settings/org.eclipse.core.resources.prefs
    ctakes/branches/sent-detector-newline-fix/ctakes-core/pom.xml
    ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector--train_a_new_model.launch
    ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector_annotator.launch
    ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/Tokenizer_annotator.launch
    ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CPE_GUI--core.launch
    ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CVD--core.launch
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionReader.java
    ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/.settings/org.eclipse.core.resources.prefs?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/.settings/org.eclipse.core.resources.prefs (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/.settings/org.eclipse.core.resources.prefs Fri Dec  5 19:18:05 2014
@@ -1,5 +1,4 @@
 eclipse.preferences.version=1
 encoding//src/main/java=UTF-8
-encoding//src/main/resources=UTF-8
 encoding//src/test/java=UTF-8
 encoding/<project>=UTF-8

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/pom.xml?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/pom.xml (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/pom.xml Fri Dec  5 19:18:05 2014
@@ -26,7 +26,7 @@
 	<parent>
 		<groupId>org.apache.ctakes</groupId>
 		<artifactId>ctakes</artifactId>
-		<version>3.2.1-SNAPSHOT</version>
+		<version>3.2.2-SNAPSHOT</version>
 	</parent>
 	<dependencies>
 	     <dependency>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector--train_a_new_model.launch
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector--train_a_new_model.launch?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector--train_a_new_model.launch (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector--train_a_new_model.launch Fri Dec  5 19:18:05 2014
@@ -2,7 +2,7 @@
 <launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
 <stringAttribute key="bad_container_name" value="\core\resources\launch\SentenceDetector - argument checking"/>
 <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/ctakes-core/src/main/java/org/apache/ctakes/ctakes-core/ae/SentenceDetector.java"/>
+<listEntry value="/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java"/>
 </listAttribute>
 <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
 <listEntry value="1"/>
@@ -10,7 +10,7 @@
 <booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
 <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.core.ae.SentenceDetector"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="data/test/sample_sd_training_sentences.txt&#13;&#10;resources/sentdetect/sample_sd.mod&#13;&#10;100&#13;&#10;5"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="/home/tmill/mnt/rc-pub/resources/corpora/sentence-training/finished/all_finished.train&#13;&#10;../ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sample_sd.mod&#13;&#10;100&#13;&#10;5"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-core"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
 </launchConfiguration>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector_annotator.launch
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector_annotator.launch?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector_annotator.launch (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/SentenceDetector_annotator.launch Fri Dec  5 19:18:05 2014
@@ -11,5 +11,5 @@
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.cpm.CpmFrame"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-core"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx3g"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx500M"/>
 </launchConfiguration>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/Tokenizer_annotator.launch
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/Tokenizer_annotator.launch?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/Tokenizer_annotator.launch (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/Tokenizer_annotator.launch Fri Dec  5 19:18:05 2014
@@ -11,5 +11,5 @@
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.cpm.CpmFrame"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-core"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx3g"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx500M"/>
 </launchConfiguration>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CPE_GUI--core.launch
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CPE_GUI--core.launch?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CPE_GUI--core.launch (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CPE_GUI--core.launch Fri Dec  5 19:18:05 2014
@@ -24,5 +24,5 @@
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.cpm.CpmFrame"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-core"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx3g"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx500M"/>
 </launchConfiguration>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CVD--core.launch
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CVD--core.launch?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CVD--core.launch (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/resources/launch/UIMA_CVD--core.launch Fri Dec  5 19:18:05 2014
@@ -13,5 +13,5 @@
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.annot_view.Gladis"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-core"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="&quot;-Djava.util.logging.config.file=${env_var:UIMA_HOME}/Logger.properties&quot;  -Xms500M -Xmx3g"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="&quot;-Djava.util.logging.config.file=${env_var:UIMA_HOME}/Logger.properties&quot;  -Xms500M -Xmx500M"/>
 </launchConfiguration>

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/DocumentIdPrinterAnalysisEngine.java Fri Dec  5 19:18:05 2014
@@ -36,7 +36,7 @@ public class DocumentIdPrinterAnalysisEn
     String documentId = DocumentIDAnnotationUtil.getDocumentID(jcas);
     String logMessage = String.format("##### current file document id: \"%s\"", documentId);
     logger.info(logMessage);
-    System.out.println(logMessage);
+//    System.out.println(logMessage);
   }
 
 }

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java Fri Dec  5 19:18:05 2014
@@ -41,6 +41,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.refsem.Date;
 import org.apache.ctakes.typesystem.type.refsem.Event;
 import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.refsem.LabDeltaFlag;
 import org.apache.ctakes.typesystem.type.refsem.LabReferenceRange;
 import org.apache.ctakes.typesystem.type.refsem.LabValue;
 import org.apache.ctakes.typesystem.type.refsem.MedicationDosage;
@@ -59,10 +60,14 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.relation.AspectualTextRelation;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.ComplicatesDisruptsTextRelation;
+import org.apache.ctakes.typesystem.type.relation.ContraindicatesTextRelation;
 import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DiagnosesTextRelation;
+import org.apache.ctakes.typesystem.type.relation.IndicatesTextRelation;
 import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
 import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation;
 import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.PreventsTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.relation.ResultOfTextRelation;
 import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
@@ -78,6 +83,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.GenericModifier;
 import org.apache.ctakes.typesystem.type.textsem.HistoryOfModifier;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.LabDeltaFlagModifier;
 import org.apache.ctakes.typesystem.type.textsem.LabEstimatedModifier;
 import org.apache.ctakes.typesystem.type.textsem.LabInterpretationModifier;
 import org.apache.ctakes.typesystem.type.textsem.LabMention;
@@ -154,7 +160,7 @@ public class SHARPKnowtatorXMLReader ext
   /**
    * Get the URI that the text in this class was loaded from
    */
-  protected URI getTextURI(JCas jCas) throws AnalysisEngineProcessException {
+  protected URI getTextURI(JCas jCas) {
     String textPath = JCasUtil.selectSingle(jCas, DocumentID.class).getDocumentID();
     if (this.textDirectory != null) {
       textPath = this.textDirectory + File.separator + textPath;
@@ -176,13 +182,18 @@ public class SHARPKnowtatorXMLReader ext
    * @throws URISyntaxException 
    */
   protected URI getKnowtatorURI(JCas jCas) throws AnalysisEngineProcessException {
-    String textURI = this.getTextURI(jCas).toString();
-    String xmlURI = textURI.replaceAll("Knowtator[/\\\\]text", "Knowtator_XML") + ".knowtator.xml";
-    File fileTest = new File(URI.create(xmlURI));
-    if(!fileTest.exists()){
-      xmlURI = xmlURI.replace("_XML", " XML");
-    }
-    return UriUtils.create(xmlURI);
+    File textURI = new File(this.getTextURI(jCas));
+    String filename = textURI.getName().replace(".txt", "");
+    
+    File xmlPath = new File(textURI.getParentFile().getParentFile().getParentFile().getParentFile().getParentFile(), "by-document/" + filename + "/" + filename + ".umls.knowtator.xml");
+    
+//    String xmlURI = textURI.replaceAll("Knowtator[/\\\\]text", "Knowtator_XML") + ".knowtator.xml";
+//    File fileTest = new File(URI.create(xmlURI));
+//    if(!fileTest.exists()){
+//      xmlURI = xmlURI.replace("_XML", " XML");
+//    }
+//    return URI.create(xmlPath.getAbsolutePath());
+    return UriUtils.create("file:" + xmlPath.getAbsolutePath());
   }
 
   /**
@@ -229,6 +240,7 @@ public class SHARPKnowtatorXMLReader ext
     entityRelationTypes.add("location_of");
     entityRelationTypes.add("manages/treats");
     entityRelationTypes.add("manifestation_of"); // note the misspelling
+    entityRelationTypes.add("prevents");
     entityRelationTypes.add("result_of");
     Set<String> eventRelationTypes = new HashSet<String>();
     eventRelationTypes.add("TLINK");
@@ -274,6 +286,7 @@ public class SHARPKnowtatorXMLReader ext
         AnatomicalSiteMention mention = new AnatomicalSiteMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_ANATOMICAL_SITE,
@@ -281,7 +294,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation bodyLaterality = annotationSlots.remove("body_laterality");
         delayedFeatures.add(new DelayedFeature(mention, "bodyLaterality", bodyLaterality));
         KnowtatorAnnotation bodySide = annotationSlots.remove("body_side");
@@ -291,6 +305,7 @@ public class SHARPKnowtatorXMLReader ext
         EventMention mention = new EventMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_CLINICAL_ATTRIBUTE,
@@ -298,12 +313,18 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
 
       } else if ("Devices".equals(annotation.type)) {
+        if(coveringSpan.begin < 0 || coveringSpan.end < 0){
+          LOGGER.error(String.format("Device annotation (id=%s) has invalid span [%d,%d]", annotation.id, coveringSpan.begin, coveringSpan.end)); 
+          continue;
+        }
         EntityMention mention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_DEVICE,
@@ -311,12 +332,14 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
 
       } else if ("Disease_Disorder".equals(annotation.type)) {
         DiseaseDisorderMention mention = new DiseaseDisorderMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_DISORDER,
@@ -324,7 +347,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation alleviatingFactor = annotationSlots.remove("alleviating_factor");
         delayedFeatures.add(DelayedRelationFeature.forArg2(
             mention,
@@ -376,6 +400,7 @@ public class SHARPKnowtatorXMLReader ext
         LabMention mention = new LabMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_LAB,
@@ -383,7 +408,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation ordinal = annotationSlots.remove("ordinal_interpretation");
         delayedFeatures.add(DelayedRelationFeature.forArg1(
             mention,
@@ -400,11 +426,13 @@ public class SHARPKnowtatorXMLReader ext
             labValue,
             ResultOfTextRelation.class,
             LabValueModifier.class));
-
+        KnowtatorAnnotation deltaFlag = annotationSlots.remove("delta_flag");
+        delayedFeatures.add(new DelayedFeature(mention, "deltaFlag", deltaFlag));
       } else if ("Medications/Drugs".equals(annotation.type)) {
         MedicationMention mention = new MedicationMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_DRUG,
@@ -412,7 +440,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation allergy = annotationSlots.remove("allergy_indicator");
         delayedFeatures.add(new DelayedFeature(mention, "medicationAllergy", allergy));
         KnowtatorAnnotation changeStatus = annotationSlots.remove("change_status_model");
@@ -429,6 +458,8 @@ public class SHARPKnowtatorXMLReader ext
         delayedFeatures.add(new DelayedFeature(mention, "medicationRoute", route));
         KnowtatorAnnotation startDate = annotationSlots.remove("start_date");
         delayedFeatures.add(new DelayedFeature(mention, "startDate", startDate));
+        KnowtatorAnnotation endDate = annotationSlots.remove("end_date");
+        delayedFeatures.add(new DelayedFeature(mention, "endDate", endDate));
         KnowtatorAnnotation strength = annotationSlots.remove("strength_model");
         delayedFeatures.add(new DelayedFeature(mention, "medicationStrength", strength));
 
@@ -436,6 +467,7 @@ public class SHARPKnowtatorXMLReader ext
         EventMention mention = new EventMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_PHENOMENA,
@@ -443,12 +475,14 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
 
       } else if ("Procedure".equals(annotation.type)) {
         ProcedureMention mention = new ProcedureMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_PROCEDURE,
@@ -456,7 +490,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation bodyLaterality = annotationSlots.remove("body_laterality");
         delayedFeatures.add(new DelayedFeature(mention, "bodyLaterality", bodyLaterality));
         KnowtatorAnnotation bodyLocation = annotationSlots.remove("body_location");
@@ -477,6 +512,7 @@ public class SHARPKnowtatorXMLReader ext
         SignSymptomMention mention = new SignSymptomMention(jCas, coveringSpan.begin, coveringSpan.end);
         addIdentifiedAnnotationFeatures(
             annotation,
+            knowtatorURI,
             mention,
             jCas,
             CONST.NE_TYPE_ID_FINDING,
@@ -484,7 +520,8 @@ public class SHARPKnowtatorXMLReader ext
             booleanSlots,
             annotationSlots,
             idAnnotationMap,
-            delayedFeatures);
+            delayedFeatures,
+            delayedRelations);
         KnowtatorAnnotation alleviatingFactor = annotationSlots.remove("alleviating_factor");
         delayedFeatures.add(DelayedRelationFeature.forArg2(
             mention,
@@ -845,7 +882,16 @@ public class SHARPKnowtatorXMLReader ext
         modifier.setNormalizedForm(attribute);
         modifier.addToIndexes();
         idAnnotationMap.put(annotation.id, modifier);
-
+      } else if ("delta_flag_indicator".equals(annotation.type)) {
+        String value = stringSlots.remove("delta_flag_normalization");
+        LabDeltaFlagModifier modifier = new LabDeltaFlagModifier(jCas, coveringSpan.begin, coveringSpan.end);
+        LabDeltaFlag attribute = new LabDeltaFlag(jCas);
+        attribute.setValue(value);
+        attribute.addToIndexes();
+//        modifier.setValue(value)
+        modifier.setNormalizedForm(attribute);
+        modifier.addToIndexes();
+        idAnnotationMap.put(annotation.id, modifier);
       } else if ("Value".equals(annotation.type)) {
         KnowtatorAnnotation unit = annotationSlots.remove("value_unit");
         KnowtatorAnnotation number = annotationSlots.remove("value_number");
@@ -970,18 +1016,23 @@ public class SHARPKnowtatorXMLReader ext
         KnowtatorAnnotation unit = annotationSlots.remove("strength_unit");
         KnowtatorAnnotation number = annotationSlots.remove("strength_number");
         MedicationStrength attribute = new MedicationStrength(jCas);
+        int spanStart=text.length()-1,spanEnd=0;  // the strength annotation is spanless so we get the modifier span by its components
         if (unit != null) {
           KnowtatorAnnotation.Span unitSpan = unit.getCoveringSpan();
           String unitString = text.substring(unitSpan.begin, unitSpan.end);
           attribute.setUnit(unitString);
+          if(unitSpan.begin < spanStart) spanStart = unitSpan.begin;
+          if(unitSpan.end > spanEnd) spanEnd = unitSpan.end;
         }
         if (number != null) {
           KnowtatorAnnotation.Span numberSpan = number.getCoveringSpan();
           String numberString = text.substring(numberSpan.begin, numberSpan.end);
           attribute.setNumber(numberString);
+          if(numberSpan.begin < spanStart) spanStart = numberSpan.begin;
+          if(numberSpan.end > spanEnd) spanEnd = numberSpan.end;
         }
         attribute.addToIndexes();
-        MedicationStrengthModifier modifier = new MedicationStrengthModifier(jCas, coveringSpan.begin, coveringSpan.end);
+        MedicationStrengthModifier modifier = new MedicationStrengthModifier(jCas, spanStart, spanEnd);
         modifier.setNormalizedForm(attribute);
         modifier.addToIndexes();
         idAnnotationMap.put(annotation.id, modifier);
@@ -1023,9 +1074,11 @@ public class SHARPKnowtatorXMLReader ext
       } else if ("Date".equals(annotation.type)) {
         String month = stringSlots.remove("month");
         String day = stringSlots.remove("day");
+        String year = stringSlots.remove("year");
         Date date = new Date(jCas);
         date.setMonth(month);
         date.setDay(day);
+        date.setYear(year);
         date.addToIndexes();
         TimeMention mention = new TimeMention(jCas, coveringSpan.begin, coveringSpan.end);
         mention.setDate(date);
@@ -1086,7 +1139,12 @@ public class SHARPKnowtatorXMLReader ext
 
     // all mentions should be added, so add features that required other annotations
     for (DelayedFeature delayedFeature : delayedFeatures) {
+      try{
       delayedFeature.setValueFrom(idAnnotationMap);
+      }catch(Exception e){
+        System.err.println("Exception reading input: " + e.getMessage());
+        e.printStackTrace(System.err);
+      }
     }
   }
   
@@ -1100,8 +1158,22 @@ public class SHARPKnowtatorXMLReader ext
     return String.format("%s(%s)", ann.getClass().getSimpleName(), result);
   }
   
+//  private static void addIdentifiedAnnotationFeatures(
+//      KnowtatorAnnotation annotation,
+//      final IdentifiedAnnotation mention,
+//      JCas jCas,
+//      int typeID,
+//      Map<String, String> stringSlots,
+//      Map<String, Boolean> booleanSlots,
+//      Map<String, KnowtatorAnnotation> annotationSlots,
+//      Map<String, TOP> idAnnotationMap,
+//      List<DelayedFeature> delayedFeatures) {
+//    addIdentifiedAnnotationFeatures(annotation, null, mention, jCas, typeID, stringSlots, booleanSlots, annotationSlots, idAnnotationMap, delayedFeatures, null);
+//  }
+
   private static void addIdentifiedAnnotationFeatures(
       KnowtatorAnnotation annotation,
+      URI knowtatorURI,
       final IdentifiedAnnotation mention,
       JCas jCas,
       int typeID,
@@ -1109,7 +1181,8 @@ public class SHARPKnowtatorXMLReader ext
       Map<String, Boolean> booleanSlots,
       Map<String, KnowtatorAnnotation> annotationSlots,
       Map<String, TOP> idAnnotationMap,
-      List<DelayedFeature> delayedFeatures) {
+      List<DelayedFeature> delayedFeatures,
+      List<DelayedRelation> delayedRelations) {
     mention.setTypeID(typeID);
     mention.setConfidence(1.0f);
     mention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
@@ -1129,6 +1202,15 @@ public class SHARPKnowtatorXMLReader ext
     delayedFeatures.add(new DelayedFeatureFromFeature(mention, "historyOf", historyOf));
     KnowtatorAnnotation negationIndicator = annotationSlots.remove("negation_indicator_CU");
     delayedFeatures.add(new DelayedFeatureFromFeature(mention, "polarity", negationIndicator));
+    if(negationIndicator != null){
+      DelayedRelation polRel = new DelayedRelation();
+      polRel.source = negationIndicator;
+      polRel.target = annotation;
+      polRel.type = "polarityModifier";
+      polRel.sourceFile = knowtatorURI;
+      if(delayedRelations != null) delayedRelations.add(polRel);
+    }
+//    delayedFeatures.add(DelayedRelationFeature.forArg2(mention, "polarityModifier", negationIndicator, BinaryTextRelation.class, PolarityModifier.class));
     KnowtatorAnnotation subject = annotationSlots.remove("subject_CU");
     delayedFeatures.add(new DelayedFeatureFromFeature(mention, "subject", subject) {
       @Override
@@ -1285,15 +1367,26 @@ public class SHARPKnowtatorXMLReader ext
       
       // add the relation to the CAS
       BinaryTextRelation relation = null;
-      if ("affects".equals(this.annotation.type)) {
+      if(this.annotation == null){
+        relation = new BinaryTextRelation(jCas);
+      } else if ("affects".equals(this.annotation.type)) {
         this.assertTypes(sourceMention, EventMention.class, targetMention, IdentifiedAnnotation.class);
         relation = new AffectsTextRelation(jCas);
       } else if ("complicates/disrupts".equals(this.annotation.type)) {
         this.assertTypes(sourceMention, EventMention.class, targetMention, EventMention.class);
         relation = new ComplicatesDisruptsTextRelation(jCas);
+      } else if ("contraindicates".equals(this.annotation.type)) {
+        this.assertTypes(sourceMention, IdentifiedAnnotation.class, targetMention, EventMention.class);
+        relation = new ContraindicatesTextRelation(jCas);
       } else if ("degree_of".equals(this.annotation.type)) {
         this.assertTypes(sourceMention, EventMention.class, targetMention, Modifier.class);
         relation = new DegreeOfTextRelation(jCas);
+      } else if ("diagnoses".equals(this.annotation.type)) {
+        this.assertTypes(sourceMention, EventMention.class, targetMention, IdentifiedAnnotation.class);
+        relation = new DiagnosesTextRelation(jCas);
+      } else if ("indicates".equals(this.annotation.type)) {
+        this.assertTypes(sourceMention, EventMention.class, targetMention, EventMention.class);
+        relation = new IndicatesTextRelation(jCas);
       } else if ("location_of".equals(this.annotation.type)) {
         if (!(targetMention instanceof AnatomicalSiteMention) && (sourceMention instanceof AnatomicalSiteMention)) {
           // fix reversed arguments in manual annotations
@@ -1314,9 +1407,15 @@ public class SHARPKnowtatorXMLReader ext
         this.assertTypes(sourceMention, EventMention.class, targetMention, EventMention.class);
         relation = new ManifestationOfTextRelation(jCas);
         relation.setCategory("manifestation_of"); // fix typo in Knowtator type system
+      } else if ("prevents".equals(this.annotation.type)) {
+        this.assertTypes(sourceMention, EventMention.class, targetMention, EventMention.class);
+        relation = new PreventsTextRelation(jCas);
       } else if ("result_of".equals(this.annotation.type)) {
         this.assertTypes(sourceMention, EventMention.class, targetMention, IdentifiedAnnotation.class);
         relation = new ResultOfTextRelation(jCas);
+//      } else if ("prevents".equals(this.annotation.type)) {
+//        this.assertTypes(sourceMention, expectedSourceClass, targetMention, expectedTargetClass);
+//        relation = new PreventsTextRelation(jCas);
       } else if ("TLINK".equals(this.annotation.type)) {
         relation = new TemporalTextRelation(jCas);
         relation.setCategory(this.type);
@@ -1329,7 +1428,11 @@ public class SHARPKnowtatorXMLReader ext
       
       // set the relation cateory (if not already set)
       if (relation.getCategory() == null) {
-        relation.setCategory(this.annotation.type);
+        if(this.type != null){
+          relation.setCategory(this.type);
+        }else{
+          relation.setCategory(this.annotation.type);
+        }
       }
       
       // link the relation to its arguments and add it to the CAS
@@ -1344,7 +1447,7 @@ public class SHARPKnowtatorXMLReader ext
       relation.addToIndexes();
     
       // add the relation to the map so it can be used in features of other annotations
-      idAnnotationMap.put(this.annotation.id, relation);
+      if(this.annotation != null) idAnnotationMap.put(this.annotation.id, relation);
     }
     
     private void assertTypes(Annotation sourceMention, Class<? extends Annotation> expectedSourceClass, Annotation targetMention, Class<? extends Annotation> expectedTargetClass) {

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java Fri Dec  5 19:18:05 2014
@@ -30,8 +30,11 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
+import opennlp.tools.cmdline.sentdetect.SentenceDetectorCrossValidatorTool;
+import opennlp.tools.cmdline.sentdetect.SentenceEvaluationErrorListener;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.sentdetect.DefaultSDContextGenerator;
+import opennlp.tools.sentdetect.SDCrossValidator;
 import opennlp.tools.sentdetect.SentenceDetectorFactory;
 import opennlp.tools.sentdetect.SentenceDetectorME;
 import opennlp.tools.sentdetect.SentenceModel;
@@ -42,8 +45,10 @@ import opennlp.tools.util.PlainTextByLin
 import opennlp.tools.util.TrainingParameters;
 
 import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.sentence.SDContextGeneratorCtakes;
 import org.apache.ctakes.core.sentence.EndOfSentenceScannerImpl;
 import org.apache.ctakes.core.sentence.SentenceDetectorCtakes;
+import org.apache.ctakes.core.sentence.SentenceDetectorFactoryCtakes;
 import org.apache.ctakes.core.sentence.SentenceSpan;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -114,7 +119,7 @@ public class SentenceDetector extends JC
 		  logger.info("Sentence detector model file: " + sdModelPath);
 		  sdmodel = new SentenceModel(is);
 		  EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
-		  DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters());
+		  SDContextGeneratorCtakes cg = new SDContextGeneratorCtakes(eoss.getEndOfSentenceCharacters());
 		  sentenceDetector = new SentenceDetectorCtakes(
 		      sdmodel.getMaxentModel(), cg, eoss);
 
@@ -324,23 +329,17 @@ public class SentenceDetector extends JC
 		  mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iters));
 		  mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cut));
 
-		  // Abbreviations dictionary
-		  // TODO: Actually import a Dictionary of abbreviations
-		  Dictionary dict = new Dictionary();
-
 		  try {
-		    SentenceDetectorFactory sdFactory = new SentenceDetectorFactory(
-		        "en", true, dict, scanner.getEndOfSentenceCharacters());
-		    mod = SentenceDetectorME.train("en", sampleStream, sdFactory, mlParams);
+		    SentenceDetectorFactoryCtakes sdFactory = new SentenceDetectorFactoryCtakes(scanner.getEndOfSentenceCharacters());
+		    mod = SentenceDetectorME.train("en", sampleStream, sdFactory, mlParams);		    
 		  } finally {
 		    sampleStream.close();
 		  }
 		}
-		
-		try(FileOutputStream outStream = new FileOutputStream(outFile)){
-		  logger.info("Saving the model as: " + outFile.getAbsolutePath());
-		  mod.serialize(outStream);
-		}
+    try(FileOutputStream outStream = new FileOutputStream(outFile)){
+      logger.info("Saving the model as: " + outFile.getAbsolutePath());
+      mod.serialize(outStream);
+    }		
 	}
 
 	public static void usage(Logger log) {

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionReader.java?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionReader.java (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FilesInDirectoryCollectionReader.java Fri Dec  5 19:18:05 2014
@@ -138,7 +138,7 @@ public class FilesInDirectoryCollectionR
 	    	File[] files = directory.listFiles();
 	    	for (int i = 0; i < files.length; i++)
 	    	{
-	    		if (!files[i].isDirectory() && hasValidExtension(files[i]))
+	    		if (!files[i].isDirectory() && hasValidExtension(files[i]) && !files[i].isHidden())
 	    		{
 	    			iv_files.add(files[i]);  
 	    		}

Added: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SDContextGeneratorCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SDContextGeneratorCtakes.java?rev=1643405&view=auto
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SDContextGeneratorCtakes.java (added)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SDContextGeneratorCtakes.java Fri Dec  5 19:18:05 2014
@@ -0,0 +1,116 @@
+package org.apache.ctakes.core.sentence;
+
+import java.util.ArrayList;
+
+import opennlp.tools.sentdetect.DefaultSDContextGenerator;
+import opennlp.tools.util.StringUtil;
+
+public class SDContextGeneratorCtakes extends DefaultSDContextGenerator {
+
+  // TODO -- is this threadsafe?? At the very least its not less thread-safe than existing data structures in parent class
+  String ws = null;
+  
+  public SDContextGeneratorCtakes(char[] eosCharacters) {
+    super(eosCharacters);
+  }
+
+  @Override
+  public String[] getContext(CharSequence sb, int position) {
+    // add features to addlFeats string array:
+    int lastIndex = sb.length() - 1;
+    int wsEnd = nextNonspaceIndex(sb, position, lastIndex);
+    if(wsEnd != -1 && position != lastIndex){
+      ws = new StringBuilder(sb.subSequence(position + 1, wsEnd)).toString();
+    }
+
+    return super.getContext(sb, position);    
+  }
+  
+  private static String escapeChar(Character c) {
+    if (c == '\n') {
+      return "<LF>";
+    }
+
+    if (c == '\r') {
+      return "<CR>";
+    }
+
+    return new String(new char[]{c});
+  }
+
+  @Override
+  protected void collectFeatures(String prefix, String suffix, String previous, String next, Character eosChar) {
+    super.collectFeatures(prefix, suffix, previous, next, eosChar);
+
+    if (!next.equals("")) {
+      if(isAllUpper(next)) {
+        collectFeats.add("nbold");
+      }
+    }
+    buf.append("ws=");
+    String featValue  = ws.replace("\n", "<LF>").replace("\t", "<SPACE>").replace(" ", "<SPACE>").replace("\r", "");
+    buf.append(featValue);
+    //    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    buf.append("lfs=");
+    String lfs = featValue.replace("<SPACE>", "");
+    buf.append(lfs);
+    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    buf.append("eolws=");
+    buf.append(escapeChar(eosChar));
+    buf.append(',');
+    buf.append(lfs);
+    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    buf.append("nextshape=");
+    buf.append(getShape(next));
+    //    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    String collapsedShape = getCollapsedShape(next); 
+    buf.append("collapsedNext=");
+    buf.append(collapsedShape);
+    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    buf.append("collapasedPrev=");
+    buf.append(getCollapsedShape(previous));
+    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+    buf.append("collapsedPrefix=");
+    buf.append(getCollapsedShape(prefix));
+    collectFeats.add(buf.toString());
+    buf.setLength(0);
+
+  }
+  
+  private static final boolean isAllUpper(String s) {
+    for(int i = 0; i < s.length(); i++){
+      if(!Character.isUpperCase(s.charAt(i))){
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private static final String getShape(String s){
+    return s.replaceAll("\\p{Upper}", "U").replaceAll("\\p{Lower}", "L").replaceAll("\\p{Digit}", "D").replaceAll("\\p{Punct}","P");
+  }
+
+  private static final String getCollapsedShape(String s){
+    return getShape(s).replaceAll("(.)\\1+", "$1+");
+  }
+
+  private static final int nextNonspaceIndex(CharSequence sb, int seek, int lastIndex) {
+    while(seek < lastIndex){
+      char c = sb.charAt(++seek);
+      if(!StringUtil.isWhitespace(c)) return seek;
+    }
+    return lastIndex;
+  }
+}

Modified: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java?rev=1643405&r1=1643404&r2=1643405&view=diff
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java (original)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java Fri Dec  5 19:18:05 2014
@@ -29,11 +29,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import opennlp.model.MaxentModel;
 import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.GISModel;
-import opennlp.tools.ml.model.EventStream;
-import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.sentdetect.EndOfSentenceScanner;
 import opennlp.tools.sentdetect.SDContextGenerator;
 import opennlp.tools.sentdetect.SDEventStream;
@@ -42,7 +39,6 @@ import opennlp.tools.sentdetect.Sentence
 import opennlp.tools.sentdetect.SentenceSample;
 import opennlp.tools.sentdetect.SentenceSampleStream;
 import opennlp.tools.sentdetect.lang.Factory;
-import opennlp.tools.util.HashSumEventStream;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.StringUtil;
@@ -228,123 +224,12 @@ public class SentenceDetectorCtakes {
 	  protected boolean isAcceptableBreak(String s, int fromIndex, int candidateIndex) {
 	    return true;
 	  }
-	  /*
-	  public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples,
-	      boolean useTokenEnd, Dictionary abbreviations) throws IOException {
-	    return train(languageCode, samples, useTokenEnd, abbreviations,5,100);
-	  }
-	  
-	  public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples,
-	      boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException {
-
-	    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
-	    ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations);
-	    
-	    Factory factory = new Factory();
-
-	    // TODO: Fix the EventStream to throw exceptions when training goes wrong
-	    ObjectStream eventStream = new SDEventStream(samples,
-	        factory.createSentenceContextGenerator(languageCode),
-	        factory.createEndOfSentenceScanner(languageCode));
-	    
-	    HashSumEventStream hses = new HashSumEventStream(eventStream);
-	    GISModel sentModel = GIS.trainModel(hses, iterations, cutoff);
-
-	    manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, 
-	        hses.calculateHashSum().toString(16));
-	    
-	    return new SentenceModel(languageCode, sentModel,
-	        useTokenEnd, abbreviations, manifestInfoEntries);
-	  }
-*/
-	  private static void usage() {
-	    System.err.println("Usage: SentenceDetectorME -encoding charset -lang language trainData modelName [cutoff iterations]");
-	    System.err.println("-encoding charset specifies the encoding which should be used ");
-	    System.err.println("                  for reading and writing text.");
-	    System.err.println("-lang language    specifies the language which ");
-	    System.err.println("                  is being processed.");
-	    System.err.println("trainData         specifies the name of the input training file");
-	    System.err.println("                  to train the resulting model.");
-	    System.err.println("modelName         specifies the resulting saved model after");
-	    System.err.println("                  training.");
-	    System.exit(1);
-	  }
 
-	  /**
-	   * <p>Trains a new sentence detection model.</p>
-	   *
-	   * <p>Usage: opennlp.tools.sentdetect.SentenceDetectorME data_file new_model_name (iterations cutoff)?</p>
-	   *
-	   * @param args
-	   * @throws IOException
-	   */
-	  /*
-	  public static void main(String[] args) throws IOException {
-	    int ai=0;
-	    String encoding = null;
-	    String lang = null;
-	    if (args.length == 0) {
-	      usage();
-	    }
-	    while (args[ai].startsWith("-")) {
-	      if (args[ai].equals("-encoding")) {
-	        ai++;
-	        if (ai < args.length) {
-	          encoding = args[ai];
-	          ai++;
-	        }
-	        else {
-	          usage();
-	        }
-	      }
-	      else if (args[ai].equals("-lang")) {
-	        ai++;
-	        if (ai < args.length) {
-	          lang = args[ai];
-	          ai++;
-	        }
-	        else {
-	          usage();
-	        }
-	      }
-	      else {
-	        usage();
-	      }
-	    }
-
-	    File inFile = new File(args[ai++]);
-	    File outFile = new File(args[ai++]);
-
-	    int numberOfArgs = args.length;
-	    int iters = (ai < numberOfArgs ? convertToInt(args[ai++]) : 100);
-	    int cutoff = (ai < numberOfArgs ? convertToInt(args[ai++]) : 4);
-
-
-	    try {
-	      if ((lang == null) || (encoding == null)) {
-	        usage();
-	      }
-
-	      
-	      SentenceModel model = train(lang, new SentenceSampleStream(new PlainTextByLineStream(
-	          new InputStreamReader(new FileInputStream(inFile), encoding))), true, null, cutoff, iters);
-
-	      // TODO: add support for iterations and cutoff settings
-
-//	      if (args.length > ai)
-//	        mod = train(es, Integer.parseInt(args[ai++]), Integer.parseInt(args[ai++]));
-//	      else
-//	        mod = train(es, 100, 5);
-
-	      System.out.println("Saving the model as: " + outFile);
-	      model.serialize(new FileOutputStream(outFile));
-	    }
-	    catch (Exception e) {
-	      e.printStackTrace();
-	    }
-	  }
-
-*/
+	  // RE: Missing main method for training -- there were two versions -- one in here and one
+	  // in SentenceDetector.java, and the one in here was old so it was removed.
+	  // Please use the org.apache.ctakes.core.ae.SentenceDetector for training
+	  // sentence detector models in cTAKES.
+	  
 	private static int convertToInt(String s) {
 
 		int i = Integer.parseInt(s); 

Added: ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorFactoryCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorFactoryCtakes.java?rev=1643405&view=auto
==============================================================================
--- ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorFactoryCtakes.java (added)
+++ ctakes/branches/sent-detector-newline-fix/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorFactoryCtakes.java Fri Dec  5 19:18:05 2014
@@ -0,0 +1,22 @@
+package org.apache.ctakes.core.sentence;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.sentdetect.SDContextGenerator;
+import opennlp.tools.sentdetect.SentenceDetectorFactory;
+
+public class SentenceDetectorFactoryCtakes extends SentenceDetectorFactory {
+
+  // need empty constructor to allow this to be instantiated through reflection in opennlp classes
+  public SentenceDetectorFactoryCtakes(){
+    super();
+  }
+  
+  public SentenceDetectorFactoryCtakes(char[] eosChars){
+    super("en", true, new Dictionary(), eosChars);
+  }
+  
+  @Override
+  public SDContextGenerator getSDContextGenerator() {
+    return new SDContextGeneratorCtakes(this.getEOSCharacters());
+  }
+}



Mime
View raw message