manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1726929 - in /manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp: Messages.java OpenNlpExtractor.java OpenNlpExtractorConfig.java
Date Wed, 27 Jan 2016 00:33:07 GMT
Author: kwright
Date: Wed Jan 27 00:33:06 2016
New Revision: 1726929

URL: http://svn.apache.org/viewvc?rev=1726929&view=rev
Log:
Use standard indents

Modified:
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java

Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java Wed Jan 27 00:33:06 2016
@@ -24,116 +24,116 @@ import org.apache.manifoldcf.core.interf
 
 public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
 {
-    public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
-    public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
-    
-    /** Constructor - do no instantiate
-     */
-     protected Messages()
-     {
-     }
-     
-     public static String getString(Locale locale, String messageKey)
-     {
-       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
-     }
-
-     public static String getAttributeString(Locale locale, String messageKey)
-     {
-       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
-     }
-
-     public static String getBodyString(Locale locale, String messageKey)
-     {
-       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
-     }
-
-     public static String getAttributeJavascriptString(Locale locale, String messageKey)
-     {
-       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
-     }
-
-     public static String getBodyJavascriptString(Locale locale, String messageKey)
-     {
-       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
-     }
-
-     public static String getString(Locale locale, String messageKey, Object[] args)
-     {
-       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
-     }
-
-     public static String getAttributeString(Locale locale, String messageKey, Object[] args)
-     {
-       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
-     }
-     
-     public static String getBodyString(Locale locale, String messageKey, Object[] args)
-     {
-       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
-     }
-
-     public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
-     {
-       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
-     }
-
-     public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
-     {
-       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
-     }
-
-     // More general methods which allow bundlenames and class loaders to be specified.
-     
-     public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
-     {
-       return getString(Messages.class, bundleName, locale, messageKey, args);
-     }
-
-     public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
-     {
-       return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
-     }
-
-     public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
-     {
-       return getBodyString(Messages.class, bundleName, locale, messageKey, args);
-     }
-     
-     public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
-     {
-       return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
-     }
-
-     public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
-     {
-       return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
-     }
-
-     // Resource output
-     
-     public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
-       Map<String,String> substitutionParameters, boolean mapToUpperCase)
-       throws ManifoldCFException
-     {
-       outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
-         substitutionParameters,mapToUpperCase);
-     }
-     
-     public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
-       Map<String,String> substitutionParameters, boolean mapToUpperCase)
-       throws ManifoldCFException
-     {
-       outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
-         substitutionParameters,mapToUpperCase);
-     }
-
-     public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
-       Map<String,Object> contextObjects)
-       throws ManifoldCFException
-     {
-       outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
-         contextObjects);
-     }
+  public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
+  public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
+  
+  /** Constructor - do no instantiate
+   */
+   protected Messages()
+   {
+   }
+   
+   public static String getString(Locale locale, String messageKey)
+   {
+     return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+   }
+
+   public static String getAttributeString(Locale locale, String messageKey)
+   {
+     return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+   }
+
+   public static String getBodyString(Locale locale, String messageKey)
+   {
+     return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+   }
+
+   public static String getAttributeJavascriptString(Locale locale, String messageKey)
+   {
+     return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+   }
+
+   public static String getBodyJavascriptString(Locale locale, String messageKey)
+   {
+     return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+   }
+
+   public static String getString(Locale locale, String messageKey, Object[] args)
+   {
+     return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+   }
+
+   public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+   {
+     return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+   }
+   
+   public static String getBodyString(Locale locale, String messageKey, Object[] args)
+   {
+     return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+   }
+
+   public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+   {
+     return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+   }
+
+   public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+   {
+     return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+   }
+
+   // More general methods which allow bundlenames and class loaders to be specified.
+   
+   public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+   {
+     return getString(Messages.class, bundleName, locale, messageKey, args);
+   }
+
+   public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+   {
+     return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+   }
+
+   public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+   {
+     return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+   }
+   
+   public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+   {
+     return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+   }
+
+   public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+   {
+     return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+   }
+
+   // Resource output
+   
+   public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+     Map<String,String> substitutionParameters, boolean mapToUpperCase)
+     throws ManifoldCFException
+   {
+     outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+     substitutionParameters,mapToUpperCase);
+   }
+   
+   public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+     Map<String,String> substitutionParameters, boolean mapToUpperCase)
+     throws ManifoldCFException
+   {
+     outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+     substitutionParameters,mapToUpperCase);
+   }
+
+   public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+     Map<String,Object> contextObjects)
+     throws ManifoldCFException
+   {
+     outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+     contextObjects);
+   }
 
 }

Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java Wed Jan 27 00:33:06 2016
@@ -46,468 +46,468 @@ import org.apache.manifoldcf.core.interf
 import org.apache.manifoldcf.core.interfaces.VersionContext;
 
 public class OpenNlpExtractor extends BaseTransformationConnector {
-	private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
-	private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
-	private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
-
-	// Meta-data fields added by this connector
-	private static final String PERSONS = "ner_people";
-	private static final String LOCATIONS = "ner_locations";
-	private static final String ORGANIZATIONS = "ner_organizations";
-
-	protected static final String ACTIVITY_EXTRACT = "extract";
-
-	protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
-
-	/**
-	 * Return a list of activities that this connector generates. The connector
-	 * does NOT need to be connected before this method is called.
-	 * 
-	 * @return the set of activities.
-	 */
-	@Override
-	public String[] getActivitiesList() {
-		return activitiesList;
-	}
-
-	/**
-	 * Get a pipeline version string, given a pipeline specification object. The
-	 * version string is used to uniquely describe the pertinent details of the
-	 * specification and the configuration, to allow the Connector Framework to
-	 * determine whether a document will need to be processed again. Note that
-	 * the contents of any document cannot be considered by this method; only
-	 * configuration and specification information can be considered.
-	 * 
-	 * This method presumes that the underlying connector object has been
-	 * configured.
-	 * 
-	 * @param spec
-	 *            is the current pipeline specification object for this
-	 *            connection for the job that is doing the crawling.
-	 * @return a string, of unlimited length, which uniquely describes
-	 *         configuration and specification in such a way that if two such
-	 *         strings are equal, nothing that affects how or whether the
-	 *         document is indexed will be different.
-	 */
-	@Override
-	public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
-		SpecPacker sp = new SpecPacker(os);
-		return new VersionContext(sp.toPackedString(), params, os);
-	}
-
-	/**
-	 * Add (or replace) a document in the output data store using the connector.
-	 * This method presumes that the connector object has been configured, and
-	 * it is thus able to communicate with the output data store should that be
-	 * necessary. The OutputSpecification is *not* provided to this method,
-	 * because the goal is consistency, and if output is done it must be
-	 * consistent with the output description, since that was what was partly
-	 * used to determine if output should be taking place. So it may be
-	 * necessary for this method to decode an output description string in order
-	 * to determine what should be done.
-	 * 
-	 * @param documentURI
-	 *            is the URI of the document. The URI is presumed to be the
-	 *            unique identifier which the output data store will use to
-	 *            process and serve the document. This URI is constructed by the
-	 *            repository connector which fetches the document, and is thus
-	 *            universal across all output connectors.
-	 * @param outputDescription
-	 *            is the description string that was constructed for this
-	 *            document by the getOutputDescription() method.
-	 * @param document
-	 *            is the document data to be processed (handed to the output
-	 *            data store).
-	 * @param authorityNameString
-	 *            is the name of the authority responsible for authorizing any
-	 *            access tokens passed in with the repository document. May be
-	 *            null.
-	 * @param activities
-	 *            is the handle to an object that the implementer of a pipeline
-	 *            connector may use to perform operations, such as logging
-	 *            processing activity, or sending a modified document to the
-	 *            next stage in the pipeline.
-	 * @return the document status (accepted or permanently rejected).
-	 * @throws IOException
-	 *             only if there's a stream error reading the document data.
-	 */
-	@Override
-	public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
-			RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
-					throws ManifoldCFException, ServiceInterruption, IOException {
-		// assumes use of Tika extractor before using this connector
-		Logging.agents.debug("Starting OpenNlp extraction");
-
-		SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
-
-		byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
-
-		SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
-		Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
-		NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
-		NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
-		NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
-
-		// create a duplicate
-		RepositoryDocument docCopy = document.duplicate();
-		Map<String, List<String>> nerMap = new HashMap<>();
-
-		if (document.getBinaryLength() > 0) {
-			String textContent = new String(bytes, StandardCharsets.UTF_8);
-			List<String> peopleList = new ArrayList<>();
-			List<String> locationsList = new ArrayList<>();
-			List<String> organizationsList = new ArrayList<>();
-
-			String[] sentences = sentenceDetector.sentDetect(textContent);
-			for (String sentence : sentences) {
-				String[] tokens = tokenizer.tokenize(sentence);
-
-				Span[] spans = peopleFinder.find(tokens);
-				peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
-				spans = locationFinder.find(tokens);
-				locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
-				spans = organizationFinder.find(tokens);
-				organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
-			}
-
-			nerMap.put(PERSONS, peopleList);
-			nerMap.put(LOCATIONS, locationsList);
-			nerMap.put(ORGANIZATIONS, organizationsList);
-		}
-		// reset original stream
-		docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
-
-		// add named entity meta-data
-		if (!nerMap.isEmpty()) {
-			for (Entry<String, List<String>> entry : nerMap.entrySet()) {
-				List<String> neList = entry.getValue();
-				String[] neArray = neList.toArray(new String[neList.size()]);
-				docCopy.addField(entry.getKey(), neArray);
-			}
-		}
-
-		return activities.sendDocument(documentURI, docCopy);
-	}
-
-	// ////////////////////////
-	// UI Methods
-	// ////////////////////////
-
-	/**
-	 * Obtain the name of the form check javascript method to call.
-	 * 
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @return the name of the form check javascript method.
-	 */
-	@Override
-	public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
-		return "s" + connectionSequenceNumber + "_checkSpecification";
-	}
-
-	/**
-	 * Obtain the name of the form presave check javascript method to call.
-	 * 
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @return the name of the form presave check javascript method.
-	 */
-	@Override
-	public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
-		return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
-	}
-
-	/**
-	 * Output the specification header section. This method is called in the
-	 * head section of a job page which has selected an output connection of the
-	 * current type. Its purpose is to add the required tabs to the list, and to
-	 * output any javascript methods that might be needed by the job editing
-	 * HTML.
-	 * 
-	 * @param out
-	 *            is the output to which any HTML should be sent.
-	 * @param locale
-	 *            is the preferred local of the output.
-	 * @param os
-	 *            is the current output specification for this job.
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @param tabsArray
-	 *            is an array of tab names. Add to this array any tab names that
-	 *            are specific to the connector.
-	 */
-	@Override
-	public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
-			int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
-		Map<String, Object> paramMap = new HashMap<String, Object>();
-		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-
-		tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
-
-		Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
-	}
-
-	/**
-	 * Output the specification body section. This method is called in the body
-	 * section of a job page which has selected an output connection of the
-	 * current type. Its purpose is to present the required form elements for
-	 * editing. The coder can presume that the HTML that is output from this
-	 * configuration will be within appropriate <html>, <body>, and <form> tags.
-	 * The name of the form is "editjob".
-	 * 
-	 * @param out
-	 *            is the output to which any HTML should be sent.
-	 * @param locale
-	 *            is the preferred local of the output.
-	 * @param os
-	 *            is the current output specification for this job.
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @param actualSequenceNumber
-	 *            is the connection within the job that has currently been
-	 *            selected.
-	 * @param tabName
-	 *            is the current tab name.
-	 */
-	@Override
-	public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
-			int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
-		Map<String, Object> paramMap = new HashMap<String, Object>();
-
-		paramMap.put("TABNAME", tabName);
-		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-		paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
-
-		fillInFieldMappingSpecificationMap(paramMap, os);
-
-		Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
-	}
-
-	/**
-	 * Process a specification post. This method is called at the start of job's
-	 * edit or view page, whenever there is a possibility that form data for a
-	 * connection has been posted. Its purpose is to gather form information and
-	 * modify the output specification accordingly. The name of the posted form
-	 * is "editjob".
-	 * 
-	 * @param variableContext
-	 *            contains the post data, including binary file-upload
-	 *            information.
-	 * @param locale
-	 *            is the preferred local of the output.
-	 * @param os
-	 *            is the current output specification for this job.
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @return null if all is well, or a string error message if there is an
-	 *         error that should prevent saving of the job (and cause a
-	 *         redirection to an error page).
-	 */
-	@Override
-	public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
-			int connectionSequenceNumber) throws ManifoldCFException {
-		String seqPrefix = "s" + connectionSequenceNumber + "_";
-
-		SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
-		String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
-		if (smodelPath != null) {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
-		} else {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
-		}
-		os.addChild(os.getChildCount(), node);
-
-		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
-		String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
-		if (tmodelPath != null) {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
-		} else {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
-		}
-		os.addChild(os.getChildCount(), node);
-
-		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
-		String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
-		if (pmodelPath != null) {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
-		} else {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
-		}
-		os.addChild(os.getChildCount(), node);
-
-		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
-		String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
-		if (lmodelPath != null) {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
-		} else {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
-		}
-		os.addChild(os.getChildCount(), node);
-
-		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
-		String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
-		if (omodelPath != null) {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
-		} else {
-			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
-		}
-		os.addChild(os.getChildCount(), node);
-
-		return null;
-	}
-
-	/**
-	 * View specification. This method is called in the body section of a job's
-	 * view page. Its purpose is to present the output specification information
-	 * to the user. The coder can presume that the HTML that is output from this
-	 * configuration will be within appropriate <html> and <body> tags.
-	 * 
-	 * @param out
-	 *            is the output to which any HTML should be sent.
-	 * @param locale
-	 *            is the preferred local of the output.
-	 * @param connectionSequenceNumber
-	 *            is the unique number of this connection within the job.
-	 * @param os
-	 *            is the current output specification for this job.
-	 */
-	@Override
-	public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
-			throws ManifoldCFException, IOException {
-		Map<String, Object> paramMap = new HashMap<String, Object>();
-		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-
-		fillInFieldMappingSpecificationMap(paramMap, os);
-		Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
-	}
-
-	protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
-		String sModelPath = "";
-		String tModelPath = "";
-		String pModelPath = "";
-		String lModelPath = "";
-		String oModelPath = "";
-
-		for (int i = 0; i < os.getChildCount(); i++) {
-			SpecificationNode sn = os.getChild(i);
-			if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
-				sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				if (sModelPath == null) {
-					sModelPath = "";
-				}
-			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
-				tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				if (tModelPath == null) {
-					tModelPath = "";
-				}
-			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
-				pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				if (pModelPath == null) {
-					pModelPath = "";
-				}
-			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
-				lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				if (lModelPath == null) {
-					lModelPath = "";
-				}
-			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
-				oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				if (oModelPath == null) {
-					oModelPath = "";
-				}
-			}
-
-		}
-		paramMap.put("SMODELPATH", sModelPath);
-		paramMap.put("TMODELPATH", tModelPath);
-		paramMap.put("PMODELPATH", pModelPath);
-		paramMap.put("LMODELPATH", lModelPath);
-		paramMap.put("OMODELPATH", oModelPath);
-	}
-
-	protected static class SpecPacker {
-
-		private final String sModelPath;
-		private final String tModelPath;
-		private final String pModelPath;
-		private final String lModelPath;
-		private final String oModelPath;
-
-		public SpecPacker(Specification os) {
-			String sModelPath = null;
-			String tModelPath = null;
-			String pModelPath = null;
-			String lModelPath = null;
-			String oModelPath = null;
-
-			for (int i = 0; i < os.getChildCount(); i++) {
-				SpecificationNode sn = os.getChild(i);
-
-				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
-					sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				}
-				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
-					tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				}
-				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
-					pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				}
-				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
-					lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				}
-				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
-					oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
-				}
-
-			}
-			this.sModelPath = sModelPath;
-			this.tModelPath = tModelPath;
-			this.pModelPath = pModelPath;
-			this.lModelPath = lModelPath;
-			this.oModelPath = oModelPath;
-		}
-
-		public String toPackedString() {
-			StringBuilder sb = new StringBuilder();
-
-			// extract nouns
-			if (sModelPath != null)
-				sb.append(sModelPath);
-			if (tModelPath != null)
-				sb.append(tModelPath);
-			if (pModelPath != null)
-				sb.append(pModelPath);
-			if (lModelPath != null)
-				sb.append(lModelPath);
-			if (oModelPath != null)
-				sb.append(oModelPath);
-
-			return sb.toString();
-		}
-
-		public String getSModelPath() {
-			return sModelPath;
-		}
-
-		public String getTModelPath() {
-			return tModelPath;
-		}
-
-		public String getPModelPath() {
-			return pModelPath;
-		}
-
-		public String getLModelPath() {
-			return lModelPath;
-		}
-
-		public String getOModelPath() {
-			return oModelPath;
-		}
+  private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+  private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+  private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+  // Meta-data fields added by this connector
+  private static final String PERSONS = "ner_people";
+  private static final String LOCATIONS = "ner_locations";
+  private static final String ORGANIZATIONS = "ner_organizations";
+
+  protected static final String ACTIVITY_EXTRACT = "extract";
+
+  protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
+
+  /**
+   * Return a list of activities that this connector generates. The connector
+   * does NOT need to be connected before this method is called.
+   * 
+   * @return the set of activities.
+   */
+  @Override
+  public String[] getActivitiesList() {
+    return activitiesList;
+  }
+
+  /**
+   * Get a pipeline version string, given a pipeline specification object. The
+   * version string is used to uniquely describe the pertinent details of the
+   * specification and the configuration, to allow the Connector Framework to
+   * determine whether a document will need to be processed again. Note that
+   * the contents of any document cannot be considered by this method; only
+   * configuration and specification information can be considered.
+   * 
+   * This method presumes that the underlying connector object has been
+   * configured.
+   * 
+   * @param spec
+   *            is the current pipeline specification object for this
+   *            connection for the job that is doing the crawling.
+   * @return a string, of unlimited length, which uniquely describes
+   *         configuration and specification in such a way that if two such
+   *         strings are equal, nothing that affects how or whether the
+   *         document is indexed will be different.
+   */
+  @Override
+  public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(os);
+    return new VersionContext(sp.toPackedString(), params, os);
+  }
+
+  /**
+   * Add (or replace) a document in the output data store using the connector.
+   * This method presumes that the connector object has been configured, and
+   * it is thus able to communicate with the output data store should that be
+   * necessary. The OutputSpecification is *not* provided to this method,
+   * because the goal is consistency, and if output is done it must be
+   * consistent with the output description, since that was what was partly
+   * used to determine if output should be taking place. So it may be
+   * necessary for this method to decode an output description string in order
+   * to determine what should be done.
+   * 
+   * @param documentURI
+   *            is the URI of the document. The URI is presumed to be the
+   *            unique identifier which the output data store will use to
+   *            process and serve the document. This URI is constructed by the
+   *            repository connector which fetches the document, and is thus
+   *            universal across all output connectors.
+   * @param outputDescription
+   *            is the description string that was constructed for this
+   *            document by the getOutputDescription() method.
+   * @param document
+   *            is the document data to be processed (handed to the output
+   *            data store).
+   * @param authorityNameString
+   *            is the name of the authority responsible for authorizing any
+   *            access tokens passed in with the repository document. May be
+   *            null.
+   * @param activities
+   *            is the handle to an object that the implementer of a pipeline
+   *            connector may use to perform operations, such as logging
+   *            processing activity, or sending a modified document to the
+   *            next stage in the pipeline.
+   * @return the document status (accepted or permanently rejected).
+   * @throws IOException
+   *             only if there's a stream error reading the document data.
+   */
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
+      RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+          throws ManifoldCFException, ServiceInterruption, IOException {
+    // assumes use of Tika extractor before using this connector
+    Logging.agents.debug("Starting OpenNlp extraction");
+
+    SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
+
+    byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
+
+    SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
+    Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
+    NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
+    NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
+    NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
+
+    // create a duplicate
+    RepositoryDocument docCopy = document.duplicate();
+    Map<String, List<String>> nerMap = new HashMap<>();
+
+    if (document.getBinaryLength() > 0) {
+      String textContent = new String(bytes, StandardCharsets.UTF_8);
+      List<String> peopleList = new ArrayList<>();
+      List<String> locationsList = new ArrayList<>();
+      List<String> organizationsList = new ArrayList<>();
+
+      String[] sentences = sentenceDetector.sentDetect(textContent);
+      for (String sentence : sentences) {
+        String[] tokens = tokenizer.tokenize(sentence);
+
+        Span[] spans = peopleFinder.find(tokens);
+        peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+        spans = locationFinder.find(tokens);
+        locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+        spans = organizationFinder.find(tokens);
+        organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+      }
+
+      nerMap.put(PERSONS, peopleList);
+      nerMap.put(LOCATIONS, locationsList);
+      nerMap.put(ORGANIZATIONS, organizationsList);
+    }
+    // reset original stream
+    docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
+
+    // add named entity meta-data
+    if (!nerMap.isEmpty()) {
+      for (Entry<String, List<String>> entry : nerMap.entrySet()) {
+        List<String> neList = entry.getValue();
+        String[] neArray = neList.toArray(new String[neList.size()]);
+        docCopy.addField(entry.getKey(), neArray);
+      }
+    }
+
+    return activities.sendDocument(documentURI, docCopy);
+  }
+
+  // ////////////////////////
+  // UI Methods
+  // ////////////////////////
+
+  /**
+   * Obtain the name of the form check javascript method to call.
+   * 
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @return the name of the form check javascript method.
+   */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
+    return "s" + connectionSequenceNumber + "_checkSpecification";
+  }
+
+  /**
+   * Obtain the name of the form presave check javascript method to call.
+   * 
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @return the name of the form presave check javascript method.
+   */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
+    return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
+  }
+
+  /**
+   * Output the specification header section. This method is called in the
+   * head section of a job page which has selected an output connection of the
+   * current type. Its purpose is to add the required tabs to the list, and to
+   * output any javascript methods that might be needed by the job editing
+   * HTML.
+   * 
+   * @param out
+   *            is the output to which any HTML should be sent.
+   * @param locale
+   *            is the preferred local of the output.
+   * @param os
+   *            is the current output specification for this job.
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @param tabsArray
+   *            is an array of tab names. Add to this array any tab names that
+   *            are specific to the connector.
+   */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+      int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+    tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
+
+    Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
+  }
+
+  /**
+   * Output the specification body section. This method is called in the body
+   * section of a job page which has selected an output connection of the
+   * current type. Its purpose is to present the required form elements for
+   * editing. The coder can presume that the HTML that is output from this
+   * configuration will be within appropriate <html>, <body>, and <form> tags.
+   * The name of the form is "editjob".
+   * 
+   * @param out
+   *            is the output to which any HTML should be sent.
+   * @param locale
+   *            is the preferred local of the output.
+   * @param os
+   *            is the current output specification for this job.
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @param actualSequenceNumber
+   *            is the connection within the job that has currently been
+   *            selected.
+   * @param tabName
+   *            is the current tab name.
+   */
+  @Override
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
+      int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    paramMap.put("TABNAME", tabName);
+    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+    paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+    fillInFieldMappingSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
+  }
+
+  /**
+   * Process a specification post. This method is called at the start of job's
+   * edit or view page, whenever there is a possibility that form data for a
+   * connection has been posted. Its purpose is to gather form information and
+   * modify the output specification accordingly. The name of the posted form
+   * is "editjob".
+   * 
+   * @param variableContext
+   *            contains the post data, including binary file-upload
+   *            information.
+   * @param locale
+   *            is the preferred local of the output.
+   * @param os
+   *            is the current output specification for this job.
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @return null if all is well, or a string error message if there is an
+   *         error that should prevent saving of the job (and cause a
+   *         redirection to an error page).
+   */
+  @Override
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+      int connectionSequenceNumber) throws ManifoldCFException {
+    String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+    SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
+    String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
+    if (smodelPath != null) {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
+    } else {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+    }
+    os.addChild(os.getChildCount(), node);
+
+    node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
+    String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
+    if (tmodelPath != null) {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
+    } else {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+    }
+    os.addChild(os.getChildCount(), node);
+
+    node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
+    String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
+    if (pmodelPath != null) {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
+    } else {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+    }
+    os.addChild(os.getChildCount(), node);
+
+    node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
+    String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
+    if (lmodelPath != null) {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
+    } else {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+    }
+    os.addChild(os.getChildCount(), node);
+
+    node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
+    String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
+    if (omodelPath != null) {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
+    } else {
+      node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+    }
+    os.addChild(os.getChildCount(), node);
+
+    return null;
+  }
+
+  /**
+   * View specification. This method is called in the body section of a job's
+   * view page. Its purpose is to present the output specification information
+   * to the user. The coder can presume that the HTML that is output from this
+   * configuration will be within appropriate <html> and <body> tags.
+   * 
+   * @param out
+   *            is the output to which any HTML should be sent.
+   * @param locale
+   *            is the preferred local of the output.
+   * @param connectionSequenceNumber
+   *            is the unique number of this connection within the job.
+   * @param os
+   *            is the current output specification for this job.
+   */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
+      throws ManifoldCFException, IOException {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
+  }
+
+  protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
+    String sModelPath = "";
+    String tModelPath = "";
+    String pModelPath = "";
+    String lModelPath = "";
+    String oModelPath = "";
+
+    for (int i = 0; i < os.getChildCount(); i++) {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+        sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        if (sModelPath == null) {
+          sModelPath = "";
+        }
+      } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+        tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        if (tModelPath == null) {
+          tModelPath = "";
+        }
+      } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+        pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        if (pModelPath == null) {
+          pModelPath = "";
+        }
+      } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+        lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        if (lModelPath == null) {
+          lModelPath = "";
+        }
+      } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+        oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        if (oModelPath == null) {
+          oModelPath = "";
+        }
+      }
+
+    }
+    paramMap.put("SMODELPATH", sModelPath);
+    paramMap.put("TMODELPATH", tModelPath);
+    paramMap.put("PMODELPATH", pModelPath);
+    paramMap.put("LMODELPATH", lModelPath);
+    paramMap.put("OMODELPATH", oModelPath);
+  }
+
+  protected static class SpecPacker {
+
+    private final String sModelPath;
+    private final String tModelPath;
+    private final String pModelPath;
+    private final String lModelPath;
+    private final String oModelPath;
+
+    public SpecPacker(Specification os) {
+      String sModelPath = null;
+      String tModelPath = null;
+      String pModelPath = null;
+      String lModelPath = null;
+      String oModelPath = null;
+
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+
+        if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+          sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        }
+        if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+          tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        }
+        if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+          pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        }
+        if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+          lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        }
+        if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+          oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+        }
+
+      }
+      this.sModelPath = sModelPath;
+      this.tModelPath = tModelPath;
+      this.pModelPath = pModelPath;
+      this.lModelPath = lModelPath;
+      this.oModelPath = oModelPath;
+    }
+
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+
+      // extract nouns
+      if (sModelPath != null)
+        sb.append(sModelPath);
+      if (tModelPath != null)
+        sb.append(tModelPath);
+      if (pModelPath != null)
+        sb.append(pModelPath);
+      if (lModelPath != null)
+        sb.append(lModelPath);
+      if (oModelPath != null)
+        sb.append(oModelPath);
+
+      return sb.toString();
+    }
+
+    public String getSModelPath() {
+      return sModelPath;
+    }
+
+    public String getTModelPath() {
+      return tModelPath;
+    }
+
+    public String getPModelPath() {
+      return pModelPath;
+    }
+
+    public String getLModelPath() {
+      return lModelPath;
+    }
+
+    public String getOModelPath() {
+      return oModelPath;
+    }
 
-	}
+  }
 
 }

Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java Wed Jan 27 00:33:06 2016
@@ -32,11 +32,11 @@ import opennlp.tools.util.InvalidFormatE
 
 public class OpenNlpExtractorConfig
 {
-	private static enum MODEL{
-		SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
-	}
-	
-	// Specification nodes and values
+  private static enum MODEL{
+    SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
+  }
+  
+  // Specification nodes and values
     public static final String NODE_SMODEL_PATH = "SModelPath";
     public static final String NODE_TMODEL_PATH = "TModelPath";
     public static final String NODE_PMODEL_PATH = "PModelPath";
@@ -52,45 +52,45 @@ public class OpenNlpExtractorConfig
     private static TokenNameFinderModel oModel = null;
     
     private static synchronized void initializeModel(MODEL m, String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(sModel == null && m == MODEL.SENTENCE)
-    		sModel = new SentenceModel(new FileInputStream(path));
-    	if(tModel == null && m == MODEL.TOKENIZER)
-    		tModel = new TokenizerModel(new FileInputStream(path));
-    	if(pModel == null && m == MODEL.PEOPLE)
-    		pModel = new TokenNameFinderModel(new FileInputStream(path));
-    	if(lModel == null && m == MODEL.LOCATIONS)
-    		lModel = new TokenNameFinderModel(new FileInputStream(path));
-    	if(oModel == null && m == MODEL.ORGANIZATIONS)
-    		oModel = new TokenNameFinderModel(new FileInputStream(path));
+      if(sModel == null && m == MODEL.SENTENCE)
+        sModel = new SentenceModel(new FileInputStream(path));
+      if(tModel == null && m == MODEL.TOKENIZER)
+        tModel = new TokenizerModel(new FileInputStream(path));
+      if(pModel == null && m == MODEL.PEOPLE)
+        pModel = new TokenNameFinderModel(new FileInputStream(path));
+      if(lModel == null && m == MODEL.LOCATIONS)
+        lModel = new TokenNameFinderModel(new FileInputStream(path));
+      if(oModel == null && m == MODEL.ORGANIZATIONS)
+        oModel = new TokenNameFinderModel(new FileInputStream(path));
     }
     
     public static final SentenceDetector sentenceDetector(String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(sModel == null)
-    		initializeModel(MODEL.SENTENCE, path);
+      if(sModel == null)
+        initializeModel(MODEL.SENTENCE, path);
         return new SentenceDetectorME(sModel);
     }
     
     public static final Tokenizer tokenizer(String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(tModel == null)
-    		initializeModel(MODEL.TOKENIZER, path);
+      if(tModel == null)
+        initializeModel(MODEL.TOKENIZER, path);
         return new TokenizerME(tModel);
     }
     
     public static final NameFinderME peopleFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(pModel == null)
-    		initializeModel(MODEL.PEOPLE, path);
+      if(pModel == null)
+        initializeModel(MODEL.PEOPLE, path);
         return new NameFinderME(pModel);
     }
     
     public static final NameFinderME locationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(lModel == null)
-    		initializeModel(MODEL.LOCATIONS, path);
+      if(lModel == null)
+        initializeModel(MODEL.LOCATIONS, path);
         return new NameFinderME(lModel);
     }
     
     public static final NameFinderME organizationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
-    	if(oModel == null)
-    		initializeModel(MODEL.ORGANIZATIONS, path);
+      if(oModel == null)
+        initializeModel(MODEL.ORGANIZATIONS, path);
         return new NameFinderME(oModel);
     }
 



Mime
View raw message