nifi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mcgil...@apache.org
Subject [08/47] incubator-nifi git commit: NIFI-399 initial port
Date Mon, 23 Mar 2015 12:23:39 GMT
NIFI-399 initial port


Project: http://git-wip-us.apache.org/repos/asf/incubator-nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-nifi/commit/ad18853b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-nifi/tree/ad18853b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-nifi/diff/ad18853b

Branch: refs/heads/NIFI-250
Commit: ad18853b589d80331e2f4574bce35d79bce09c28
Parents: eb5ec70
Author: joewitt <joewitt@apache.org>
Authored: Wed Mar 18 10:59:13 2015 -0400
Committer: joewitt <joewitt@apache.org>
Committed: Thu Mar 19 10:05:19 2015 -0400

----------------------------------------------------------------------
 .../standard/EvaluateRegularExpression.java     |   7 +-
 .../nifi/processors/standard/ExtractText.java   | 294 +++++++++++++++++
 .../org.apache.nifi.processor.Processor         |   1 +
 .../standard/TestEvaluateRegularExpression.java | 319 -------------------
 .../processors/standard/TestExtractText.java    | 319 +++++++++++++++++++
 5 files changed, 618 insertions(+), 322 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/ad18853b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java
----------------------------------------------------------------------
diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java
index 4140943..bb2e31a 100644
--- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java
+++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java
@@ -52,9 +52,10 @@ import org.apache.commons.lang3.StringUtils;
 @EventDriven
 @SideEffectFree
 @SupportsBatching
-@Tags({"evaluate", "Text", "Regular Expression", "regex", "experimental"})
+@Tags({"deprecated"})
 @CapabilityDescription(
-        "Evaluates one or more Regular Expressions against the content of a FlowFile.  "
+        "WARNING: This has been deprecated and will be removed in 0.2.0.  Use ExtractText instead.\n" 
+        + "Evaluates one or more Regular Expressions against the content of a FlowFile.  "
         + "The results of those Regular Expressions are assigned to FlowFile Attributes.  "
         + "Regular Expressions are entered by adding user-defined properties; "
         + "the name of the property maps to the Attribute Name into which the result will be placed.  "
@@ -62,7 +63,7 @@ import org.apache.commons.lang3.StringUtils;
         + "If the Regular Expression matches more than once, only the first match will be used.  "
         + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. "
         + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' and no attributes will be applied to the FlowFile.")
-
+@Deprecated
 public class EvaluateRegularExpression extends AbstractProcessor {
 
     public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder()

http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/ad18853b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
----------------------------------------------------------------------
diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
new file mode 100644
index 0000000..6c914d8
--- /dev/null
+++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.standard;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.stream.io.StreamUtils;
+import org.apache.nifi.logging.ProcessorLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.DataUnit;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+
+import org.apache.commons.lang3.StringUtils;
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@Tags({"evaluate", "extract", "Text", "Regular Expression", "regex"})
+@CapabilityDescription(
+        "Evaluates one or more Regular Expressions against the content of a FlowFile.  "
+        + "The results of those Regular Expressions are assigned to FlowFile Attributes.  "
+        + "Regular Expressions are entered by adding user-defined properties; "
+        + "the name of the property maps to the Attribute Name into which the result will be placed.  "
+        + "The value of the property must be a valid Regular Expressions with exactly one capturing group.  "
+        + "If the Regular Expression matches more than once, only the first match will be used.  "
+        + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. "
+        + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' and no attributes will be applied to the FlowFile.")
+
+public class ExtractText extends AbstractProcessor {
+
+    public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder()
+            .name("Character Set")
+            .description("The Character Set in which the file is encoded")
+            .required(true)
+            .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+            .defaultValue("UTF-8")
+            .build();
+
+    public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor.Builder()
+            .name("Maximum Buffer Size")
+            .description("Specifies the maximum amount of data to buffer (per file) in order to apply the regular expressions.  Files larger than the specified maximum will not be fully evaluated.")
+            .required(true)
+            .addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
+            .defaultValue("1 MB")
+            .build();
+
+    public static final PropertyDescriptor CANON_EQ = new PropertyDescriptor.Builder()
+            .name("Enable Canonical Equivalence")
+            .description("Indicates that two characters match only when their full canonical decompositions match.")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor CASE_INSENSITIVE = new PropertyDescriptor.Builder()
+            .name("Enable Case-insensitive Matching")
+            .description("Indicates that two characters match even if they are in a different case.  Can also be specified via the embeded flag (?i).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor COMMENTS = new PropertyDescriptor.Builder()
+            .name("Permit Whitespace and Comments in Pattern")
+            .description("In this mode, whitespace is ignored, and embedded comments starting with # are ignored until the end of a line.  Can also be specified via the embeded flag (?x).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor DOTALL = new PropertyDescriptor.Builder()
+            .name("Enable DOTALL Mode")
+            .description("Indicates that the expression '.' should match any character, including a line terminator.  Can also be specified via the embeded flag (?s).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor LITERAL = new PropertyDescriptor.Builder()
+            .name("Enable Literal Parsing of the Pattern")
+            .description("Indicates that Metacharacters and escape characters should be given no special meaning.")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor MULTILINE = new PropertyDescriptor.Builder()
+            .name("Enable Multiline Mode")
+            .description("Indicates that '^' and '$' should match just after and just before a line terminator or end of sequence, instead of only the begining or end of the entire input.  Can also be specified via the embeded flag (?m).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor UNICODE_CASE = new PropertyDescriptor.Builder()
+            .name("Enable Unicode-aware Case Folding")
+            .description("When used with 'Enable Case-insensitive Matching', matches in a manner consistent with the Unicode Standard.  Can also be specified via the embeded flag (?u).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor UNICODE_CHARACTER_CLASS = new PropertyDescriptor.Builder()
+            .name("Enable Unicode Predefined Character Classes")
+            .description("Specifies conformance with the Unicode Technical Standard #18: Unicode Regular Expression Annex C: Compatibility Properties.  Can also be specified via the embeded flag (?U).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final PropertyDescriptor UNIX_LINES = new PropertyDescriptor.Builder()
+            .name("Enable Unix Lines Mode")
+            .description("Indicates that only the '\n' line terminator is recognized int the behavior of '.', '^', and '$'.  Can also be specified via the embeded flag (?d).")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+    public static final Relationship REL_MATCH = new Relationship.Builder()
+            .name("matched")
+            .description(
+                    "FlowFiles are routed to this relationship when the Regular Expression is successfully evaluated and the FlowFile "
+                    + "is modified as a result")
+            .build();
+
+    public static final Relationship REL_NO_MATCH = new Relationship.Builder()
+            .name("unmatched")
+            .description(
+                    "FlowFiles are routed to this relationship when no provided Regular Expression matches the content of the FlowFile")
+            .build();
+
+    private Set<Relationship> relationships;
+    private List<PropertyDescriptor> properties;
+
+    @Override
+    protected void init(final ProcessorInitializationContext context) {
+        final Set<Relationship> relationships = new HashSet<>();
+        relationships.add(REL_MATCH);
+        relationships.add(REL_NO_MATCH);
+        this.relationships = Collections.unmodifiableSet(relationships);
+
+        final List<PropertyDescriptor> properties = new ArrayList<>();
+        properties.add(CHARACTER_SET);
+        properties.add(MAX_BUFFER_SIZE);
+        properties.add(CANON_EQ);
+        properties.add(CASE_INSENSITIVE);
+        properties.add(COMMENTS);
+        properties.add(DOTALL);
+        properties.add(LITERAL);
+        properties.add(MULTILINE);
+        properties.add(UNICODE_CASE);
+        properties.add(UNICODE_CHARACTER_CLASS);
+        properties.add(UNIX_LINES);
+        this.properties = Collections.unmodifiableList(properties);
+    }
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return relationships;
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return properties;
+    }
+
+    @Override
+    protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
+        return new PropertyDescriptor.Builder()
+                .name(propertyDescriptorName)
+                .expressionLanguageSupported(false)
+                .addValidator(StandardValidators.createRegexValidator(1, 1, true))
+                .required(false)
+                .dynamic(true)
+                .build();
+    }
+
+    @Override
+    public void onTrigger(final ProcessContext context, final ProcessSession session) {
+        final List<FlowFile> flowFileBatch = session.get(50);
+        if (flowFileBatch.isEmpty()) {
+            return;
+        }
+        final ProcessorLog logger = getLogger();
+
+        // Compile the Regular Expressions
+        Map<String, Matcher> regexMap = new HashMap<>();
+        for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
+            if (!entry.getKey().isDynamic()) {
+                continue;
+            }
+            final int flags = getCompileFlags(context);
+            final Matcher matcher = Pattern.compile(entry.getValue(), flags).matcher("");
+            regexMap.put(entry.getKey().getName(), matcher);
+        }
+
+        final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
+
+        final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
+
+        for (FlowFile flowFile : flowFileBatch) {
+
+            final Map<String, String> regexResults = new HashMap<>();
+
+            final byte[] buffer = new byte[maxBufferSize];
+
+            session.read(flowFile, new InputStreamCallback() {
+                @Override
+                public void process(InputStream in) throws IOException {
+                    StreamUtils.fillBuffer(in, buffer, false);
+                }
+            });
+
+            final int flowFileSize = Math.min((int) flowFile.getSize(), maxBufferSize);
+
+            final String contentString = new String(buffer, 0, flowFileSize, charset);
+
+            for (final Map.Entry<String, Matcher> entry : regexMap.entrySet()) {
+
+                final Matcher matcher = entry.getValue();
+
+                matcher.reset(contentString);
+
+                if (matcher.find()) {
+                    final String group = matcher.group(1);
+                    if (!StringUtils.isBlank(group)) {
+                        regexResults.put(entry.getKey(), group);
+                    }
+                }
+            }
+
+            if (!regexResults.isEmpty()) {
+                flowFile = session.putAllAttributes(flowFile, regexResults);
+                session.getProvenanceReporter().modifyAttributes(flowFile);
+                session.transfer(flowFile, REL_MATCH);
+                logger.info("Matched {} Regular Expressions and added attributes to FlowFile {}", new Object[]{regexResults.size(), flowFile});
+            } else {
+                session.transfer(flowFile, REL_NO_MATCH);
+                logger.info("Did not match any Regular Expressions for  FlowFile {}", new Object[]{flowFile});
+            }
+
+        } // end flowFileLoop
+    }
+
+    int getCompileFlags(ProcessContext context) {
+        int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
+                | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
+                | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
+                | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
+                | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
+                | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
+                | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
+                | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
+                | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
+        return flags;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/ad18853b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
----------------------------------------------------------------------
diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
index f81ccec..7fbd781 100644
--- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
+++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
@@ -25,6 +25,7 @@ org.apache.nifi.processors.standard.EvaluateXPath
 org.apache.nifi.processors.standard.EvaluateXQuery
 org.apache.nifi.processors.standard.ExecuteStreamCommand
 org.apache.nifi.processors.standard.ExecuteProcess
+org.apache.nifi.processors.standard.ExtractText
 org.apache.nifi.processors.standard.GenerateFlowFile
 org.apache.nifi.processors.standard.GetFile
 org.apache.nifi.processors.standard.GetFTP

http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/ad18853b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java
----------------------------------------------------------------------
diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java
deleted file mode 100644
index c1e5b3c..0000000
--- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nifi.processors.standard;
-
-import org.apache.nifi.processors.standard.EvaluateRegularExpression;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.UnsupportedEncodingException;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import org.apache.nifi.processor.Relationship;
-import org.apache.nifi.util.MockFlowFile;
-import org.apache.nifi.util.TestRunner;
-import org.apache.nifi.util.TestRunners;
-
-import org.junit.Test;
-
-public class TestEvaluateRegularExpression {
-
-    final String SAMPLE_STRING = "foo\r\nbar1\r\nbar2\r\nbar3\r\nhello\r\nworld\r\n";
-
-    @Test
-    public void testProcessor() throws Exception {
-
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty("regex.result1", "(?s)(.*)");
-        testRunner.setProperty("regex.result2", "(?s).*(bar1).*");
-        testRunner.setProperty("regex.result3", "(?s).*?(bar\\d).*");	// reluctant gets first
-        testRunner.setProperty("regex.result4", "(?s).*?(?:bar\\d).*?(bar\\d).*"); // reluctant w/ repeated pattern gets second
-        testRunner.setProperty("regex.result5", "(?s).*(bar\\d).*");	// greedy gets last
-        testRunner.setProperty("regex.result6", "(?s)^(.*)$");
-        testRunner.setProperty("regex.result7", "(?s)(XXX)");
-
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0);
-        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result2", "bar1");
-        out.assertAttributeEquals("regex.result3", "bar1");
-        out.assertAttributeEquals("regex.result4", "bar2");
-        out.assertAttributeEquals("regex.result5", "bar3");
-        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result7", null);
-    }
-
-    @Test
-    public void testProcessorWithDotall() throws Exception {
-
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true");
-
-        testRunner.setProperty("regex.result1", "(.*)");
-        testRunner.setProperty("regex.result2", ".*(bar1).*");
-        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");	// reluctant gets first
-        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*"); // reluctant w/ repeated pattern gets second
-        testRunner.setProperty("regex.result5", ".*(bar\\d).*");	// greedy gets last
-        testRunner.setProperty("regex.result6", "^(.*)$");
-        testRunner.setProperty("regex.result7", "^(XXX)$");
-
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0);
-        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result2", "bar1");
-        out.assertAttributeEquals("regex.result3", "bar1");
-        out.assertAttributeEquals("regex.result4", "bar2");
-        out.assertAttributeEquals("regex.result5", "bar3");
-        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result7", null);
-
-    }
-
-    @Test
-    public void testProcessorWithMultiline() throws Exception {
-
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true");
-
-        testRunner.setProperty("regex.result1", "(.*)");
-        testRunner.setProperty("regex.result2", "(bar1)");
-        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
-        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
-        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
-        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
-        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
-        testRunner.setProperty("regex.result6", "^(.*)$");
-        testRunner.setProperty("regex.result7", "^(XXX)$");
-
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0);
-        out.assertAttributeEquals("regex.result1", "foo"); 	// matches everything on the first line
-        out.assertAttributeEquals("regex.result2", "bar1");
-        out.assertAttributeEquals("regex.result3", "bar1");
-        out.assertAttributeEquals("regex.result4", null);	// null because no line has two bar's
-        out.assertAttributeEquals("regex.result4b", "bar2"); // included newlines in regex
-        out.assertAttributeEquals("regex.result5", "bar1");	//still gets first because no lines with multiple bar's 
-        out.assertAttributeEquals("regex.result5b", "bar3");// included newlines in regex
-        out.assertAttributeEquals("regex.result6", "foo");	// matches all of first line
-        out.assertAttributeEquals("regex.result7", null);	// no match
-    }
-
-    @Test
-    public void testProcessorWithMultilineAndDotall() throws Exception {
-
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true");
-        testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true");
-
-        testRunner.setProperty("regex.result1", "(.*)");
-        testRunner.setProperty("regex.result2", "(bar1)");
-        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
-        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
-        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
-        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
-        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
-        testRunner.setProperty("regex.result6", "^(.*)$");
-        testRunner.setProperty("regex.result7", "^(XXX)$");
-
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0);
-
-        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result2", "bar1");
-        out.assertAttributeEquals("regex.result3", "bar1");
-        out.assertAttributeEquals("regex.result4", "bar2");
-        out.assertAttributeEquals("regex.result4b", "bar2");
-        out.assertAttributeEquals("regex.result5", "bar3");
-        out.assertAttributeEquals("regex.result5b", "bar3");
-        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
-        out.assertAttributeEquals("regex.result7", null);
-    }
-
-    @Test
-    public void testProcessorWithNoMatches() throws Exception {
-
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true");
-        testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true");
-
-        testRunner.setProperty("regex.result2", "(bar1)");
-        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
-        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
-        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
-        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
-        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
-        testRunner.setProperty("regex.result7", "^(XXX)$");
-
-        testRunner.enqueue("YYY".getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_NO_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_NO_MATCH).get(0);
-
-        out.assertAttributeEquals("regex.result1", null);
-        out.assertAttributeEquals("regex.result2", null);
-        out.assertAttributeEquals("regex.result3", null);
-        out.assertAttributeEquals("regex.result4", null);
-        out.assertAttributeEquals("regex.result4b", null);
-        out.assertAttributeEquals("regex.result5", null);
-        out.assertAttributeEquals("regex.result5b", null);
-        out.assertAttributeEquals("regex.result6", null);
-        out.assertAttributeEquals("regex.result7", null);
-    }
-
-    @Test(expected = java.lang.AssertionError.class)
-    public void testNoCaptureGroups() throws UnsupportedEncodingException {
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-        testRunner.setProperty("regex.result1", ".*");
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-    }
-
-    @Test
-    public void testNoFlowFile() throws UnsupportedEncodingException {
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-        testRunner.run();
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 0);
-
-    }
-
-    @Test(expected = java.lang.AssertionError.class)
-    public void testTooManyCaptureGroups() throws UnsupportedEncodingException {
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-        testRunner.setProperty("regex.result1", "(.)(.)");
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-    }
-
-    @Test
-    public void testMatchOutsideBuffer() throws Exception {
-        final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression());
-
-        testRunner.setProperty(EvaluateRegularExpression.MAX_BUFFER_SIZE, "3 B");//only read the first 3 chars ("foo")
-
-        testRunner.setProperty("regex.result1", "(foo)");
-        testRunner.setProperty("regex.result2", "(world)");
-
-        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
-        testRunner.run();
-
-        testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1);
-        final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0);
-
-        out.assertAttributeEquals("regex.result1", "foo");
-        out.assertAttributeEquals("regex.result2", null); 	// null because outsk
-    }
-
-    @Test
-    public void testGetCompileFlags() {
-
-        final EvaluateRegularExpression processor = new EvaluateRegularExpression();
-        TestRunner testRunner;
-        int flags;
-
-        // NONE
-        testRunner = TestRunners.newTestRunner(processor);
-        flags = processor.getCompileFlags(testRunner.getProcessContext());
-        assertEquals(0, flags);
-
-        // UNIX_LINES
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.UNIX_LINES, "true");
-        assertEquals(Pattern.UNIX_LINES, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // CASE_INSENSITIVE
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.CASE_INSENSITIVE, "true");
-        assertEquals(Pattern.CASE_INSENSITIVE, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // COMMENTS
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.COMMENTS, "true");
-        assertEquals(Pattern.COMMENTS, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // MULTILINE
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true");
-        assertEquals(Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // LITERAL
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.LITERAL, "true");
-        assertEquals(Pattern.LITERAL, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // DOTALL
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true");
-        assertEquals(Pattern.DOTALL, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // UNICODE_CASE
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.UNICODE_CASE, "true");
-        assertEquals(Pattern.UNICODE_CASE, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // CANON_EQ
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.CANON_EQ, "true");
-        assertEquals(Pattern.CANON_EQ, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // UNICODE_CHARACTER_CLASS
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.UNICODE_CHARACTER_CLASS, "true");
-        assertEquals(Pattern.UNICODE_CHARACTER_CLASS, processor.getCompileFlags(testRunner.getProcessContext()));
-
-        // DOTALL and MULTILINE
-        testRunner = TestRunners.newTestRunner(processor);
-        testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true");
-        testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true");
-        assertEquals(Pattern.DOTALL | Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext()));
-    }
-
-    @Test
-    public void testGetRelationShips() throws Exception {
-
-        final EvaluateRegularExpression processor = new EvaluateRegularExpression();
-        final TestRunner testRunner = TestRunners.newTestRunner(processor);
-
-//		testRunner.setProperty("regex.result1", "(.*)");
-        testRunner.enqueue("foo".getBytes("UTF-8"));
-        testRunner.run();
-
-        Set<Relationship> relationships = processor.getRelationships();
-        assertTrue(relationships.contains(EvaluateRegularExpression.REL_MATCH));
-        assertTrue(relationships.contains(EvaluateRegularExpression.REL_NO_MATCH));
-        assertEquals(2, relationships.size());
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/ad18853b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
----------------------------------------------------------------------
diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
new file mode 100644
index 0000000..355d255
--- /dev/null
+++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.standard;
+
+import org.apache.nifi.processors.standard.ExtractText;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+
+import org.junit.Test;
+
+public class TestExtractText {
+
+    final String SAMPLE_STRING = "foo\r\nbar1\r\nbar2\r\nbar3\r\nhello\r\nworld\r\n";
+
+    @Test
+    public void testProcessor() throws Exception {
+
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty("regex.result1", "(?s)(.*)");
+        testRunner.setProperty("regex.result2", "(?s).*(bar1).*");
+        testRunner.setProperty("regex.result3", "(?s).*?(bar\\d).*");	// reluctant gets first
+        testRunner.setProperty("regex.result4", "(?s).*?(?:bar\\d).*?(bar\\d).*"); // reluctant w/ repeated pattern gets second
+        testRunner.setProperty("regex.result5", "(?s).*(bar\\d).*");	// greedy gets last
+        testRunner.setProperty("regex.result6", "(?s)^(.*)$");
+        testRunner.setProperty("regex.result7", "(?s)(XXX)");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result2", "bar1");
+        out.assertAttributeEquals("regex.result3", "bar1");
+        out.assertAttributeEquals("regex.result4", "bar2");
+        out.assertAttributeEquals("regex.result5", "bar3");
+        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result7", null);
+    }
+
+    @Test
+    public void testProcessorWithDotall() throws Exception {
+
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty(ExtractText.DOTALL, "true");
+
+        testRunner.setProperty("regex.result1", "(.*)");
+        testRunner.setProperty("regex.result2", ".*(bar1).*");
+        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");	// reluctant gets first
+        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*"); // reluctant w/ repeated pattern gets second
+        testRunner.setProperty("regex.result5", ".*(bar\\d).*");	// greedy gets last
+        testRunner.setProperty("regex.result6", "^(.*)$");
+        testRunner.setProperty("regex.result7", "^(XXX)$");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result2", "bar1");
+        out.assertAttributeEquals("regex.result3", "bar1");
+        out.assertAttributeEquals("regex.result4", "bar2");
+        out.assertAttributeEquals("regex.result5", "bar3");
+        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result7", null);
+
+    }
+
+    @Test
+    public void testProcessorWithMultiline() throws Exception {
+
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty(ExtractText.MULTILINE, "true");
+
+        testRunner.setProperty("regex.result1", "(.*)");
+        testRunner.setProperty("regex.result2", "(bar1)");
+        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
+        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
+        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
+        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
+        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
+        testRunner.setProperty("regex.result6", "^(.*)$");
+        testRunner.setProperty("regex.result7", "^(XXX)$");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+        out.assertAttributeEquals("regex.result1", "foo"); 	// matches everything on the first line
+        out.assertAttributeEquals("regex.result2", "bar1");
+        out.assertAttributeEquals("regex.result3", "bar1");
+        out.assertAttributeEquals("regex.result4", null);	// null because no line has two bar's
+        out.assertAttributeEquals("regex.result4b", "bar2"); // included newlines in regex
+        out.assertAttributeEquals("regex.result5", "bar1");	//still gets first because no lines with multiple bar's 
+        out.assertAttributeEquals("regex.result5b", "bar3");// included newlines in regex
+        out.assertAttributeEquals("regex.result6", "foo");	// matches all of first line
+        out.assertAttributeEquals("regex.result7", null);	// no match
+    }
+
+    @Test
+    public void testProcessorWithMultilineAndDotall() throws Exception {
+
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty(ExtractText.MULTILINE, "true");
+        testRunner.setProperty(ExtractText.DOTALL, "true");
+
+        testRunner.setProperty("regex.result1", "(.*)");
+        testRunner.setProperty("regex.result2", "(bar1)");
+        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
+        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
+        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
+        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
+        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
+        testRunner.setProperty("regex.result6", "^(.*)$");
+        testRunner.setProperty("regex.result7", "^(XXX)$");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+
+        out.assertAttributeEquals("regex.result1", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result2", "bar1");
+        out.assertAttributeEquals("regex.result3", "bar1");
+        out.assertAttributeEquals("regex.result4", "bar2");
+        out.assertAttributeEquals("regex.result4b", "bar2");
+        out.assertAttributeEquals("regex.result5", "bar3");
+        out.assertAttributeEquals("regex.result5b", "bar3");
+        out.assertAttributeEquals("regex.result6", SAMPLE_STRING);
+        out.assertAttributeEquals("regex.result7", null);
+    }
+
+    @Test
+    public void testProcessorWithNoMatches() throws Exception {
+
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty(ExtractText.MULTILINE, "true");
+        testRunner.setProperty(ExtractText.DOTALL, "true");
+
+        testRunner.setProperty("regex.result2", "(bar1)");
+        testRunner.setProperty("regex.result3", ".*?(bar\\d).*");
+        testRunner.setProperty("regex.result4", ".*?(?:bar\\d).*?(bar\\d).*");
+        testRunner.setProperty("regex.result4b", "bar\\d\\r\\n(bar\\d)");
+        testRunner.setProperty("regex.result5", ".*(bar\\d).*");
+        testRunner.setProperty("regex.result5b", "(?:bar\\d\\r?\\n)*(bar\\d)");
+        testRunner.setProperty("regex.result7", "^(XXX)$");
+
+        testRunner.enqueue("YYY".getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_NO_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_NO_MATCH).get(0);
+
+        out.assertAttributeEquals("regex.result1", null);
+        out.assertAttributeEquals("regex.result2", null);
+        out.assertAttributeEquals("regex.result3", null);
+        out.assertAttributeEquals("regex.result4", null);
+        out.assertAttributeEquals("regex.result4b", null);
+        out.assertAttributeEquals("regex.result5", null);
+        out.assertAttributeEquals("regex.result5b", null);
+        out.assertAttributeEquals("regex.result6", null);
+        out.assertAttributeEquals("regex.result7", null);
+    }
+
+    @Test(expected = java.lang.AssertionError.class)
+    public void testNoCaptureGroups() throws UnsupportedEncodingException {
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+        testRunner.setProperty("regex.result1", ".*");
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+    }
+
+    @Test
+    public void testNoFlowFile() throws UnsupportedEncodingException {
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 0);
+
+    }
+
+    @Test(expected = java.lang.AssertionError.class)
+    public void testTooManyCaptureGroups() throws UnsupportedEncodingException {
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+        testRunner.setProperty("regex.result1", "(.)(.)");
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+    }
+
+    @Test
+    public void testMatchOutsideBuffer() throws Exception {
+        final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
+
+        testRunner.setProperty(ExtractText.MAX_BUFFER_SIZE, "3 B");//only read the first 3 chars ("foo")
+
+        testRunner.setProperty("regex.result1", "(foo)");
+        testRunner.setProperty("regex.result2", "(world)");
+
+        testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
+        testRunner.run();
+
+        testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
+        final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
+
+        out.assertAttributeEquals("regex.result1", "foo");
+        out.assertAttributeEquals("regex.result2", null); 	// null because outsk
+    }
+
+    @Test
+    public void testGetCompileFlags() {
+
+        final ExtractText processor = new ExtractText();
+        TestRunner testRunner;
+        int flags;
+
+        // NONE
+        testRunner = TestRunners.newTestRunner(processor);
+        flags = processor.getCompileFlags(testRunner.getProcessContext());
+        assertEquals(0, flags);
+
+        // UNIX_LINES
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.UNIX_LINES, "true");
+        assertEquals(Pattern.UNIX_LINES, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // CASE_INSENSITIVE
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.CASE_INSENSITIVE, "true");
+        assertEquals(Pattern.CASE_INSENSITIVE, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // COMMENTS
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.COMMENTS, "true");
+        assertEquals(Pattern.COMMENTS, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // MULTILINE
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.MULTILINE, "true");
+        assertEquals(Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // LITERAL
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.LITERAL, "true");
+        assertEquals(Pattern.LITERAL, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // DOTALL
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.DOTALL, "true");
+        assertEquals(Pattern.DOTALL, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // UNICODE_CASE
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.UNICODE_CASE, "true");
+        assertEquals(Pattern.UNICODE_CASE, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // CANON_EQ
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.CANON_EQ, "true");
+        assertEquals(Pattern.CANON_EQ, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // UNICODE_CHARACTER_CLASS
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.UNICODE_CHARACTER_CLASS, "true");
+        assertEquals(Pattern.UNICODE_CHARACTER_CLASS, processor.getCompileFlags(testRunner.getProcessContext()));
+
+        // DOTALL and MULTILINE
+        testRunner = TestRunners.newTestRunner(processor);
+        testRunner.setProperty(ExtractText.DOTALL, "true");
+        testRunner.setProperty(ExtractText.MULTILINE, "true");
+        assertEquals(Pattern.DOTALL | Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext()));
+    }
+
+    @Test
+    public void testGetRelationShips() throws Exception {
+
+        final ExtractText processor = new ExtractText();
+        final TestRunner testRunner = TestRunners.newTestRunner(processor);
+
+//		testRunner.setProperty("regex.result1", "(.*)");
+        testRunner.enqueue("foo".getBytes("UTF-8"));
+        testRunner.run();
+
+        Set<Relationship> relationships = processor.getRelationships();
+        assertTrue(relationships.contains(ExtractText.REL_MATCH));
+        assertTrue(relationships.contains(ExtractText.REL_NO_MATCH));
+        assertEquals(2, relationships.size());
+    }
+
+}


Mime
View raw message