ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1589260 - in /ctakes/trunk/ctakes-lvg: desc/analysis_engine/LvgAnnotator.xml src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java
Date Tue, 22 Apr 2014 19:32:19 GMT
Author: tmill
Date: Tue Apr 22 19:32:18 2014
New Revision: 1589260

URL: http://svn.apache.org/r1589260
Log:
CTAKES-295: Update LVG to use UIMAFit style configuration parameters and resources -- changes
some arguments to being optional with sane default values.

Modified:
    ctakes/trunk/ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml
    ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java

Modified: ctakes/trunk/ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml?rev=1589260&r1=1589259&r2=1589260&view=diff
==============================================================================
--- ctakes/trunk/ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml (original)
+++ ctakes/trunk/ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml Tue Apr 22 19:32:18 2014
@@ -1,240 +1,240 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <primitive>true</primitive>
-  <annotatorImplementationName>org.apache.ctakes.lvg.ae.LvgAnnotator</annotatorImplementationName>
-  <analysisEngineMetaData>
-    <name>LVG Annotator</name>
-    <description/>
-    <version/>
-    <vendor/>
-    <configurationParameters>
-      <configurationParameter>
-        <name>UseSegments</name>
-        <description>Flag whether to use segments or full doc text.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>SegmentsToSkip</name>
-        <description>Segments to skip.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>UseCmdCache</name>
-        <description>Flag whether to use LVG cache.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>CmdCacheFileLocation</name>
-        <description>Location of LVG cache file containing LvgCmdApi data.</description>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>CmdCacheFrequencyCutoff</name>
-        <description>Cutoff frequency for items that get loaded into RAM from cache.</description>
-        <type>Integer</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>ExclusionSet</name>
-        <description>Set of words that LVG should not run on.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>XeroxTreebankMap</name>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>PostLemmas</name>
-        <description>This parameter determines whether the feature lemmaEntries will
be populated for word annotations.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>UseLemmaCache</name>
-        <description>This parameter determines whether a cache will be used to improve
perfomance of setting lemma entries.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>LemmaCacheFileLocation</name>
-        <description>This parameter determines where the lemma cache is located.</description>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>LemmaCacheFrequencyCutoff</name>
-        <description>This parameter sets a threshold for the frequency of a lemma to
be loaded into the cache.</description>
-        <type>Integer</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-    </configurationParameters>
-    <configurationParameterSettings>
-      <nameValuePair>
-        <name>UseSegments</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>SegmentsToSkip</name>
-        <value>
-          <array>
-            <string/>
-          </array>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>UseCmdCache</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>CmdCacheFileLocation</name>
-        <value>
-          <string>org/apache/ctakes/lvg/2005_norm.voc</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>CmdCacheFrequencyCutoff</name>
-        <value>
-          <integer>20</integer>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>ExclusionSet</name>
-        <value>
-          <array>
-            <string>and</string>
-            <string>And</string>
-            <string>by</string>
-            <string>By</string>
-            <string>for</string>
-            <string>For</string>
-            <string>in</string>
-            <string>In</string>
-            <string>of</string>
-            <string>Of</string>
-            <string>on</string>
-            <string>On</string>
-            <string>the</string>
-            <string>The</string>
-            <string>to</string>
-            <string>To</string>
-            <string>with</string>
-            <string>With</string>
-          </array>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>XeroxTreebankMap</name>
-        <value>
-          <array>
-            <string>adj|JJ</string>
-            <string>adv|RB</string>
-            <string>aux|AUX</string>
-            <string>compl|CS</string>
-            <string>conj|CC</string>
-            <string>det|DET</string>
-            <string>modal|MD</string>
-            <string>noun|NN</string>
-            <string>prep|IN</string>
-            <string>pron|PRP</string>
-            <string>verb|VB</string>
-          </array>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>LemmaCacheFileLocation</name>
-        <value>
-          <string>/ctakes-lvg/2005_lemma.voc</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>UseLemmaCache</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>LemmaCacheFrequencyCutoff</name>
-        <value>
-          <integer>20</integer>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>PostLemmas</name>
-        <value>
-          <boolean>true</boolean>
-        </value>
-      </nameValuePair>
-    </configurationParameterSettings>
-<typeSystemDescription>
-<imports>
-<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
-</imports>
-</typeSystemDescription>
-    <typePriorities/>
-    <fsIndexCollection/>
-    <capabilities>
-      <capability>
-        <inputs>
-          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
-        </inputs>
-        <outputs>
-          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
-        </outputs>
-        <languagesSupported/>
-      </capability>
-    </capabilities>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-      <outputsNewCASes>false</outputsNewCASes>
-    </operationalProperties>
-  </analysisEngineMetaData>
-  <externalResourceDependencies>
-    <externalResourceDependency>
-      <key>LvgCmdApi</key>
-      <description/>
-      <interfaceName>org.apache.ctakes.lvg.resource.LvgCmdApiResource</interfaceName>
-      <optional>false</optional>
-    </externalResourceDependency>
-  </externalResourceDependencies>
-  <resourceManagerConfiguration>
-    <externalResources>
-      <externalResource>
-        <name>LvgCmdApi</name>
-        <description/>
-        <fileResourceSpecifier>
-          <fileUrl>file:org/apache/ctakes/lvg/data/config/lvg.properties</fileUrl>
-        </fileResourceSpecifier>
-        <implementationName>org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl</implementationName>
-      </externalResource>
-    </externalResources>
-    <externalResourceBindings>
-      <externalResourceBinding>
-        <key>LvgCmdApi</key>
-        <resourceName>LvgCmdApi</resourceName>
-      </externalResourceBinding>
-    </externalResourceBindings>
-  </resourceManagerConfiguration>
-</taeDescription>
+<?xml version="1.0" encoding="UTF-8"?>
+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.ctakes.lvg.ae.LvgAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>LVG Annotator</name>
+    <description/>
+    <version/>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>UseSegments</name>
+        <description>Flag whether to use segments or full doc text.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>SegmentsToSkip</name>
+        <description>Segments to skip.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>UseCmdCache</name>
+        <description>Flag whether to use LVG cache.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>CmdCacheFileLocation</name>
+        <description>Location of LVG cache file containing LvgCmdApi data.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>CmdCacheFrequencyCutoff</name>
+        <description>Cutoff frequency for items that get loaded into RAM from cache.</description>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ExclusionSet</name>
+        <description>Set of words that LVG should not run on.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>XeroxTreebankMap</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>PostLemmas</name>
+        <description>This parameter determines whether the feature lemmaEntries will
be populated for word annotations.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>UseLemmaCache</name>
+        <description>This parameter determines whether a cache will be used to improve
perfomance of setting lemma entries.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>LemmaCacheFileLocation</name>
+        <description>This parameter determines where the lemma cache is located.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>LemmaCacheFrequencyCutoff</name>
+        <description>This parameter sets a threshold for the frequency of a lemma to
be loaded into the cache.</description>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>UseSegments</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>SegmentsToSkip</name>
+        <value>
+          <array>
+            <string/>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>UseCmdCache</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>CmdCacheFileLocation</name>
+        <value>
+          <string>org/apache/ctakes/lvg/2005_norm.voc</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>CmdCacheFrequencyCutoff</name>
+        <value>
+          <integer>20</integer>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>ExclusionSet</name>
+        <value>
+          <array>
+            <string>and</string>
+            <string>And</string>
+            <string>by</string>
+            <string>By</string>
+            <string>for</string>
+            <string>For</string>
+            <string>in</string>
+            <string>In</string>
+            <string>of</string>
+            <string>Of</string>
+            <string>on</string>
+            <string>On</string>
+            <string>the</string>
+            <string>The</string>
+            <string>to</string>
+            <string>To</string>
+            <string>with</string>
+            <string>With</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>XeroxTreebankMap</name>
+        <value>
+          <array>
+            <string>adj|JJ</string>
+            <string>adv|RB</string>
+            <string>aux|AUX</string>
+            <string>compl|CS</string>
+            <string>conj|CC</string>
+            <string>det|DET</string>
+            <string>modal|MD</string>
+            <string>noun|NN</string>
+            <string>prep|IN</string>
+            <string>pron|PRP</string>
+            <string>verb|VB</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>LemmaCacheFileLocation</name>
+        <value>
+          <string>/ctakes-lvg/2005_lemma.voc</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>UseLemmaCache</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>LemmaCacheFrequencyCutoff</name>
+        <value>
+          <integer>20</integer>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>PostLemmas</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+<typeSystemDescription>
+<imports>
+<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
+</imports>
+</typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
+        </inputs>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>LvgCmdApi</key>
+      <description/>
+      <interfaceName>org.apache.ctakes.lvg.resource.LvgCmdApiResource</interfaceName>
+      <optional>false</optional>
+    </externalResourceDependency>
+  </externalResourceDependencies>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>LvgCmdApi</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:org/apache/ctakes/lvg/data/config/lvg.properties</fileUrl>
+        </fileResourceSpecifier>
+        <implementationName>org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl</implementationName>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>LvgCmdApi</key>
+        <resourceName>LvgCmdApi</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
+</taeDescription>

Modified: ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java?rev=1589260&r1=1589259&r2=1589260&view=diff
==============================================================================
--- ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java (original)
+++ ctakes/trunk/ctakes-lvg/src/main/java/org/apache/ctakes/lvg/ae/LvgAnnotator.java Tue Apr
22 19:32:18 2014
@@ -18,12 +18,6 @@
  */
 package org.apache.ctakes.lvg.ae;
 
-import org.apache.ctakes.core.util.ListFactory;
-import org.apache.ctakes.lvg.resource.LvgCmdApiResource;
-import org.apache.ctakes.typesystem.type.syntax.Lemma;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-
 import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
 import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
 import gov.nih.nlm.nls.lvg.Lib.Category;
@@ -44,14 +38,21 @@ import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.Vector;
 
+import org.apache.ctakes.core.util.ListFactory;
+import org.apache.ctakes.lvg.resource.LvgCmdApiResource;
+import org.apache.ctakes.typesystem.type.syntax.Lemma;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.jcas.cas.FSList;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.descriptor.ExternalResource;
 
 /**
  * UIMA annotator that uses the UMLS LVG package to find the canonical form of
@@ -71,55 +72,138 @@ public class LvgAnnotator extends JCasAn
 	 * lemmaEntries will be populated for word annotations.
 	 */
 	public static final String PARAM_POST_LEMMAS = "PostLemmas";
+	@ConfigurationParameter(
+	    name = PARAM_POST_LEMMAS,
+	    mandatory = false,
+	    defaultValue =  "false",
+	    description = "Whether to extract the lexical variants and write to cas (creates large
files)"
+	    )
+  private boolean postLemmas;
+
 	/**
 	 * Value is "UseLemmaCache". This parameter determines whether a cache will
 	 * be used to improve performance of setting lemma entries.
 	 */
 	public static final String PARAM_USE_LEMMA_CACHE = "UseLemmaCache";
+	@ConfigurationParameter(
+	    name = PARAM_USE_LEMMA_CACHE,
+	    mandatory = false,
+	    defaultValue = "false",
+	    description = "Whether to use a cache for lemmas"
+	    )
+  private boolean useLemmaCache;
+
 	/**
 	 * Value is "LemmaCacheFileLocation". This parameter determines where the
 	 * lemma cache is located.
 	 */
 	public static final String PARAM_LEMMA_CACHE_FILE_LOCATION = "LemmaCacheFileLocation";
+	@ConfigurationParameter(
+	    name = PARAM_LEMMA_CACHE_FILE_LOCATION,
+	    mandatory = false,
+	    defaultValue = "/org/apache/ctakes/lvg/2005_lemma.voc",
+	    description = "Path to lemma cache file -- if useLemmaCache and postLemmas are true"
+	    )
+  private String lemmaCacheFileLocation=null;
+	
 	/**
 	 * Value is "LemmaCacheFrequencyCutoff". This parameter sets a threshold for
 	 * the frequency of a lemma to be loaded into the cache.
 	 */
 	public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";
+	@ConfigurationParameter(
+	    name = PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF,
+	    mandatory = false,
+	    description = "Threshold for the frequency of a lemma to be loaded into the cache",
+	    defaultValue = "20"
+	    )
+  private int cmdCacheFreqCutoff;
+
+	public static final String PARAM_USE_SEGMENTS = "UseSegments";
+	@ConfigurationParameter(
+	    name = PARAM_USE_SEGMENTS,
+	    mandatory = false,
+	    defaultValue = "false",
+	    description = "Whether to use segments found in upstream cTAKES components"
+	    )
+	private boolean useSegments;
 
+	public static final String PARAM_SKIP_SEGMENTS = "SegmentsToSkip";
+	@ConfigurationParameter(
+	    name = PARAM_SKIP_SEGMENTS,
+	    mandatory = false,
+	    defaultValue = {},
+	    description = "Segment IDs to skip during processing"
+	    )
+  private String[] skipSegmentIDs;
+  private Set<String> skipSegmentsSet;
+	
+	public static final String PARAM_XT_MAP = "XeroxTreebankMap";
+	@ConfigurationParameter(
+	    name = PARAM_XT_MAP,
+	    mandatory = true,
+	    description = "Mapping from Xerox parts of speech to Treebank equivalents"
+	    )
+	private String[] xtMaps;
+  private Map<String, String> xeroxTreebankMap;
+	
+	public static final String PARAM_USE_CMD_CACHE = "UseCmdCache";
+	@ConfigurationParameter(
+	    name = PARAM_USE_CMD_CACHE,
+	    mandatory = false,
+	    defaultValue = "false",
+	    description = "Use cache to track canonical forms"
+	    )
+  private boolean useCmdCache;
+
+	public static final String PARAM_CMD_CACHE_FILE = "CmdCacheFileLocation";
+	@ConfigurationParameter(
+	    name = PARAM_CMD_CACHE_FILE,
+	    mandatory = false,
+	    defaultValue = "/org/apache/ctakes/lvg/2005_norm.voc",
+	    description = "File with stored cache of canonical forms"
+	    )
+  private String cmdCacheFileLocation;
+
+	public static final String PARAM_LEMMA_FREQ_CUTOFF = "CmdCacheFrequencyCutoff";
+	@ConfigurationParameter(
+	    name = PARAM_LEMMA_FREQ_CUTOFF,
+	    mandatory = false,
+	    description = "Minimum frequency required for loading from cache",
+	    defaultValue = "20"
+	    )
+  private int lemmaCacheFreqCutoff;
+
+	public static final String PARAM_EXCLUSION_WORDS = "ExclusionSet";
+	@ConfigurationParameter(
+	    name = PARAM_EXCLUSION_WORDS,
+	    mandatory = false,
+	    defaultValue = {"And", "and", "By", "by", "For", "for", "In", "in", "Of", "of", "On",
"on", "The", "the", "To", "to", "With", "with"},
+	    description = "Words to exclude when doing LVG normalization"
+	    )
+	String[] wordsToExclude;
+  private Set<String> exclusionSet;
+	
 	// LOG4J logger based on class name
 	private Logger logger = Logger.getLogger(getClass().getName());
 
-	private final String LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
-
+	private final String PARAM_LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
+  @ExternalResource(
+      key = PARAM_LVGCMDAPI_RESRC_KEY,
+      mandatory = true
+      )
+  private LvgCmdApiResource lvgResource;
+      
 	private LvgCmdApi lvgCmd;
 
 	private LvgLexItemApi lvgLexItem;
 
-	private UimaContext context;
-
-	private boolean useSegments;
-
-	private Set<String> skipSegmentsSet;
-
-	private boolean useCmdCache;
-	private String cmdCacheFileLocation;
-	private int cmdCacheFreqCutoff;
-
-	private Map<String, String> xeroxTreebankMap;
-
-	private boolean postLemmas;
-	private boolean useLemmaCache;
-	private String lemmaCacheFileLocation;
-	private int lemmaCacheFreqCutoff;
-
 	// key = word, value = canonical word
 	private Map<String, String> normCacheMap;
 
 	// key = word, value = Set of Lemma objects
 	private Map<String, Set<LemmaLocalClass>> lemmaCacheMap;
 
-	private Set<String> exclusionSet;
 
 	/**
 	 * Performs initialization logic. This implementation just reads values for
@@ -132,18 +216,9 @@ public class LvgAnnotator extends JCasAn
 			throws ResourceInitializationException {
 		super.initialize(aContext);
 
-		context = aContext;
-			configInit();
+		configInit();
 
 		try {
-			LvgCmdApiResource lvgResource = (LvgCmdApiResource) context
-					.getResourceObject(LVGCMDAPI_RESRC_KEY);
-
-			if (lvgResource == null)
-				throw new ResourceInitializationException(new Exception(
-						"Unable to locate resource with key="
-								+ LVGCMDAPI_RESRC_KEY + "."));
-
 			lvgCmd = lvgResource.getLvg();
 
 			if (useCmdCache) {
@@ -161,7 +236,7 @@ public class LvgAnnotator extends JCasAn
 				}
 			}
 
-		} catch (Exception e) {
+		} catch (IOException e) {
 			throw new ResourceInitializationException(e);
 		}
 	}
@@ -169,19 +244,13 @@ public class LvgAnnotator extends JCasAn
 	/**
 	 * Sets configuration parameters with values from the descriptor.
 	 */
-	private void configInit() throws ResourceInitializationException {
-		useSegments = ((Boolean) context.getConfigParameterValue("UseSegments"))
-				.booleanValue();
-		String[] skipSegmentIDs = (String[]) context
-				.getConfigParameterValue("SegmentsToSkip");
+	private void configInit() {
 		skipSegmentsSet = new HashSet<>();
 		for (int i = 0; i < skipSegmentIDs.length; i++) {
 			skipSegmentsSet.add(skipSegmentIDs[i]);
 		}
 
 		// Load Xerox Treebank tagset map
-		String xtMaps[] = (String[]) context
-				.getConfigParameterValue("XeroxTreebankMap");
 		xeroxTreebankMap = new HashMap<>();
 		for (int i = 0; i < xtMaps.length; i++) {
 			StringTokenizer tokenizer = new StringTokenizer(xtMaps[i], "|");
@@ -192,45 +261,10 @@ public class LvgAnnotator extends JCasAn
 			}
 		}
 
-		useCmdCache = ((Boolean) context.getConfigParameterValue("UseCmdCache"))
-				.booleanValue();
-
-		cmdCacheFileLocation = (String) context
-				.getConfigParameterValue("CmdCacheFileLocation");
-
-		cmdCacheFreqCutoff = ((Integer) context
-				.getConfigParameterValue("CmdCacheFrequencyCutoff")).intValue();
-
-		String[] wordsToExclude = (String[]) context
-				.getConfigParameterValue("ExclusionSet");
 		exclusionSet = new HashSet<>();
 		for (int i = 0; i < wordsToExclude.length; i++) {
 			exclusionSet.add(wordsToExclude[i]);
 		}
-
-		Boolean bPostLemmas = (Boolean) context
-				.getConfigParameterValue(PARAM_POST_LEMMAS);
-		postLemmas = bPostLemmas == null ? false : bPostLemmas.booleanValue();
-		if (postLemmas) {
-			Boolean useLemmaCacheParam = (Boolean) context
-					.getConfigParameterValue(PARAM_USE_LEMMA_CACHE);
-			this.useLemmaCache = (useLemmaCacheParam == null ? false : useLemmaCacheParam
-					.booleanValue());
-			if (useLemmaCache) {
-				lemmaCacheFileLocation = (String) context
-						.getConfigParameterValue(PARAM_LEMMA_CACHE_FILE_LOCATION);
-				if (lemmaCacheFileLocation == null)
-					throw new ResourceInitializationException(new Exception(
-							"Parameter for " + PARAM_LEMMA_CACHE_FILE_LOCATION
-									+ " was not set."));
-				Integer lemmaCacheFreqCutoffParam = (Integer) context
-						.getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
-				if (lemmaCacheFreqCutoffParam == null)
-					this.lemmaCacheFreqCutoff = 20;
-				else
-					this.lemmaCacheFreqCutoff = lemmaCacheFreqCutoffParam.intValue();
-			}
-		}
 	}
 
 	/**



Mime
View raw message