nifi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ijokaruma...@apache.org
Subject nifi git commit: NIFI-4465 ConvertExcelToCSV Data Formatting and Delimiters
Date Tue, 17 Oct 2017 06:04:14 GMT
Repository: nifi
Updated Branches:
  refs/heads/master b950eed1a -> fd00df3d2


NIFI-4465 ConvertExcelToCSV Data Formatting and Delimiters

This closes #2194.

Signed-off-by: Koji Kawamura <ijokarumawak@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/fd00df3d
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/fd00df3d
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/fd00df3d

Branch: refs/heads/master
Commit: fd00df3d2f593b6da6c7498fa66ec6917e1639e0
Parents: b950eed
Author: patricker <patricker@gmail.com>
Authored: Thu Oct 5 13:01:47 2017 +0800
Committer: Koji Kawamura <ijokarumawak@apache.org>
Committed: Tue Oct 17 14:56:49 2017 +0900

----------------------------------------------------------------------
 .../nifi-standard-record-utils/pom.xml          |   5 +
 .../main/java/org/apache/nifi/csv/CSVUtils.java | 244 +++++++++
 .../java/org/apache/nifi/csv/CSVValidators.java | 109 ++++
 .../nifi-poi-bundle/nifi-poi-processors/pom.xml |   7 +-
 .../poi/ConvertExcelToCSVProcessor.java         | 504 ++++++++++++-------
 .../poi/ConvertExcelToCSVProcessorTest.java     | 156 +++++-
 .../src/test/resources/dataformatting.xlsx      | Bin 0 -> 10765 bytes
 .../main/java/org/apache/nifi/csv/CSVUtils.java | 244 ---------
 .../java/org/apache/nifi/csv/CSVValidators.java | 109 ----
 9 files changed, 834 insertions(+), 544 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/pom.xml b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/pom.xml
index a6ed07e..6721c98 100644
--- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/pom.xml
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/pom.xml
@@ -49,5 +49,10 @@
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-record</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.4</version>
+        </dependency>
     </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java
new file mode 100644
index 0000000..bc074b3
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.csv;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.QuoteMode;
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.PropertyValue;
+import org.apache.nifi.context.PropertyContext;
+import org.apache.nifi.processor.util.StandardValidators;
+
+public class CSVUtils {
+
+    public static final AllowableValue CUSTOM = new AllowableValue("custom", "Custom Format",
+        "The format of the CSV is configured by using the properties of this Controller Service, such as Value Separator");
+    public static final AllowableValue RFC_4180 = new AllowableValue("rfc-4180", "RFC 4180", "CSV data follows the RFC 4180 Specification defined at https://tools.ietf.org/html/rfc4180");
+    public static final AllowableValue EXCEL = new AllowableValue("excel", "Microsoft Excel", "CSV data follows the format used by Microsoft Excel");
+    public static final AllowableValue TDF = new AllowableValue("tdf", "Tab-Delimited", "CSV data is Tab-Delimited instead of Comma Delimited");
+    public static final AllowableValue INFORMIX_UNLOAD = new AllowableValue("informix-unload", "Informix Unload", "The format used by Informix when issuing the UNLOAD TO file_name command");
+    public static final AllowableValue INFORMIX_UNLOAD_CSV = new AllowableValue("informix-unload-csv", "Informix Unload Escape Disabled",
+        "The format used by Informix when issuing the UNLOAD TO file_name command with escaping disabled");
+    public static final AllowableValue MYSQL = new AllowableValue("mysql", "MySQL Format", "CSV data follows the format used by MySQL");
+
+    public static final PropertyDescriptor CSV_FORMAT = new PropertyDescriptor.Builder()
+        .name("CSV Format")
+        .description("Specifies which \"format\" the CSV data is in, or specifies if custom formatting should be used.")
+        .expressionLanguageSupported(false)
+        .allowableValues(CUSTOM, RFC_4180, EXCEL, TDF, MYSQL, INFORMIX_UNLOAD, INFORMIX_UNLOAD_CSV)
+        .defaultValue(CUSTOM.getValue())
+        .required(true)
+        .build();
+    public static final PropertyDescriptor VALUE_SEPARATOR = new PropertyDescriptor.Builder()
+        .name("Value Separator")
+        .description("The character that is used to separate values/fields in a CSV Record")
+        .addValidator(CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR)
+        .expressionLanguageSupported(false)
+        .defaultValue(",")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor QUOTE_CHAR = new PropertyDescriptor.Builder()
+        .name("Quote Character")
+        .description("The character that is used to quote values so that escape characters do not have to be used")
+        .addValidator(new CSVValidators.SingleCharacterValidator())
+        .expressionLanguageSupported(false)
+        .defaultValue("\"")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor FIRST_LINE_IS_HEADER = new PropertyDescriptor.Builder()
+        .name("Skip Header Line")
+        .displayName("Treat First Line as Header")
+        .description("Specifies whether or not the first line of CSV should be considered a Header or should be considered a record. If the Schema Access Strategy "
+            + "indicates that the columns must be defined in the header, then this property will be ignored, since the header must always be "
+            + "present and won't be processed as a Record. Otherwise, if 'true', then the first line of CSV data will not be processed as a record and if 'false',"
+            + "then the first line will be interpreted as a record.")
+        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+        .expressionLanguageSupported(false)
+        .allowableValues("true", "false")
+        .defaultValue("false")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor IGNORE_CSV_HEADER = new PropertyDescriptor.Builder()
+        .name("ignore-csv-header")
+        .displayName("Ignore CSV Header Column Names")
+        .description("If the first line of a CSV is a header, and the configured schema does not match the fields named in the header line, this controls how "
+            + "the Reader will interpret the fields. If this property is true, then the field names mapped to each column are driven only by the configured schema and "
+            + "any fields not in the schema will be ignored. If this property is false, then the field names found in the CSV Header will be used as the names of the "
+            + "fields.")
+        .expressionLanguageSupported(false)
+        .allowableValues("true", "false")
+        .defaultValue("false")
+        .required(false)
+        .build();
+    public static final PropertyDescriptor COMMENT_MARKER = new PropertyDescriptor.Builder()
+        .name("Comment Marker")
+        .description("The character that is used to denote the start of a comment. Any line that begins with this comment will be ignored.")
+        .addValidator(new CSVValidators.SingleCharacterValidator())
+        .expressionLanguageSupported(false)
+        .required(false)
+        .build();
+    public static final PropertyDescriptor ESCAPE_CHAR = new PropertyDescriptor.Builder()
+        .name("Escape Character")
+        .description("The character that is used to escape characters that would otherwise have a specific meaning to the CSV Parser.")
+        .addValidator(new CSVValidators.SingleCharacterValidator())
+        .expressionLanguageSupported(false)
+        .defaultValue("\\")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor NULL_STRING = new PropertyDescriptor.Builder()
+        .name("Null String")
+        .description("Specifies a String that, if present as a value in the CSV, should be considered a null field instead of using the literal value.")
+        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+        .expressionLanguageSupported(false)
+        .required(false)
+        .build();
+    public static final PropertyDescriptor TRIM_FIELDS = new PropertyDescriptor.Builder()
+        .name("Trim Fields")
+        .description("Whether or not white space should be removed from the beginning and end of fields")
+        .expressionLanguageSupported(false)
+        .allowableValues("true", "false")
+        .defaultValue("true")
+        .required(true)
+        .build();
+
+    // CSV Format fields for writers only
+    public static final AllowableValue QUOTE_ALL = new AllowableValue("ALL", "Quote All Values", "All values will be quoted using the configured quote character.");
+    public static final AllowableValue QUOTE_MINIMAL = new AllowableValue("MINIMAL", "Quote Minimal",
+        "Values will be quoted only if they are contain special characters such as newline characters or field separators.");
+    public static final AllowableValue QUOTE_NON_NUMERIC = new AllowableValue("NON_NUMERIC", "Quote Non-Numeric Values", "Values will be quoted unless the value is a number.");
+    public static final AllowableValue QUOTE_NONE = new AllowableValue("NONE", "Do Not Quote Values",
+        "Values will not be quoted. Instead, all special characters will be escaped using the configured escape character.");
+
+    public static final PropertyDescriptor QUOTE_MODE = new PropertyDescriptor.Builder()
+        .name("Quote Mode")
+        .description("Specifies how fields should be quoted when they are written")
+        .expressionLanguageSupported(false)
+        .allowableValues(QUOTE_ALL, QUOTE_MINIMAL, QUOTE_NON_NUMERIC, QUOTE_NONE)
+        .defaultValue(QUOTE_MINIMAL.getValue())
+        .required(true)
+        .build();
+    public static final PropertyDescriptor TRAILING_DELIMITER = new PropertyDescriptor.Builder()
+        .name("Include Trailing Delimiter")
+        .description("If true, a trailing delimiter will be added to each CSV Record that is written. If false, the trailing delimiter will be omitted.")
+        .expressionLanguageSupported(false)
+        .allowableValues("true", "false")
+        .defaultValue("false")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor RECORD_SEPARATOR = new PropertyDescriptor.Builder()
+        .name("Record Separator")
+        .description("Specifies the characters to use in order to separate CSV Records")
+        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+        .expressionLanguageSupported(false)
+        .defaultValue("\\n")
+        .required(true)
+        .build();
+    public static final PropertyDescriptor INCLUDE_HEADER_LINE = new PropertyDescriptor.Builder()
+        .name("Include Header Line")
+        .description("Specifies whether or not the CSV column names should be written out as the first line.")
+        .allowableValues("true", "false")
+        .defaultValue("true")
+        .required(true)
+        .build();
+
+    public static CSVFormat createCSVFormat(final PropertyContext context) {
+        final String formatName = context.getProperty(CSV_FORMAT).getValue();
+        if (formatName.equalsIgnoreCase(CUSTOM.getValue())) {
+            return buildCustomFormat(context);
+        }
+        if (formatName.equalsIgnoreCase(RFC_4180.getValue())) {
+            return CSVFormat.RFC4180;
+        } else if (formatName.equalsIgnoreCase(EXCEL.getValue())) {
+            return CSVFormat.EXCEL;
+        } else if (formatName.equalsIgnoreCase(TDF.getValue())) {
+            return CSVFormat.TDF;
+        } else if (formatName.equalsIgnoreCase(MYSQL.getValue())) {
+            return CSVFormat.MYSQL;
+        } else if (formatName.equalsIgnoreCase(INFORMIX_UNLOAD.getValue())) {
+            return CSVFormat.INFORMIX_UNLOAD;
+        } else if (formatName.equalsIgnoreCase(INFORMIX_UNLOAD_CSV.getValue())) {
+            return CSVFormat.INFORMIX_UNLOAD_CSV;
+        } else {
+            return CSVFormat.DEFAULT;
+        }
+    }
+
+    private static char getUnescapedChar(final PropertyContext context, final PropertyDescriptor property) {
+        return StringEscapeUtils.unescapeJava(context.getProperty(property).getValue()).charAt(0);
+    }
+
+    private static char getChar(final PropertyContext context, final PropertyDescriptor property) {
+        return CSVUtils.unescape(context.getProperty(property).getValue()).charAt(0);
+    }
+
+    private static CSVFormat buildCustomFormat(final PropertyContext context) {
+        final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR);
+        CSVFormat format = CSVFormat.newFormat(valueSeparator)
+            .withAllowMissingColumnNames()
+            .withIgnoreEmptyLines();
+
+        final PropertyValue skipHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER);
+        if (skipHeaderPropertyValue.getValue() != null && skipHeaderPropertyValue.asBoolean()) {
+            format = format.withFirstRecordAsHeader();
+        }
+
+        format = format.withQuote(getChar(context, QUOTE_CHAR));
+        format = format.withEscape(getChar(context, ESCAPE_CHAR));
+        format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean());
+
+        if (context.getProperty(COMMENT_MARKER).isSet()) {
+            format = format.withCommentMarker(getChar(context, COMMENT_MARKER));
+        }
+        if (context.getProperty(NULL_STRING).isSet()) {
+            format = format.withNullString(CSVUtils.unescape(context.getProperty(NULL_STRING).getValue()));
+        }
+
+        final PropertyValue quoteValue = context.getProperty(QUOTE_MODE);
+        if (quoteValue != null) {
+            final QuoteMode quoteMode = QuoteMode.valueOf(quoteValue.getValue());
+            format = format.withQuoteMode(quoteMode);
+        }
+
+        final PropertyValue trailingDelimiterValue = context.getProperty(TRAILING_DELIMITER);
+        if (trailingDelimiterValue != null) {
+            final boolean trailingDelimiter = trailingDelimiterValue.asBoolean();
+            format = format.withTrailingDelimiter(trailingDelimiter);
+        }
+
+        final PropertyValue recordSeparator = context.getProperty(RECORD_SEPARATOR);
+        if (recordSeparator != null) {
+            final String separator = unescape(recordSeparator.getValue());
+            format = format.withRecordSeparator(separator);
+        }
+
+        return format;
+    }
+
+
+    public static String unescape(final String input) {
+        if (input == null) {
+            return input;
+        }
+
+        return input.replace("\\t", "\t")
+            .replace("\\n", "\n")
+            .replace("\\r", "\r");
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java
new file mode 100644
index 0000000..5979407
--- /dev/null
+++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.csv;
+
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.components.Validator;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class CSVValidators {
+
+    public static class SingleCharacterValidator implements Validator {
+        private static final Set<String> illegalChars = new HashSet<>();
+
+        static {
+            illegalChars.add("\r");
+            illegalChars.add("\n");
+        }
+
+        @Override
+        public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
+
+            if (input == null) {
+                return new ValidationResult.Builder()
+                        .input(input)
+                        .subject(subject)
+                        .valid(false)
+                        .explanation("Input is null for this property")
+                        .build();
+            }
+
+            final String unescaped = CSVUtils.unescape(input);
+            if (unescaped.length() != 1) {
+                return new ValidationResult.Builder()
+                        .input(input)
+                        .subject(subject)
+                        .valid(false)
+                        .explanation("Value must be exactly 1 character but was " + input.length() + " in length")
+                        .build();
+            }
+
+            if (illegalChars.contains(unescaped)) {
+                return new ValidationResult.Builder()
+                        .input(input)
+                        .subject(subject)
+                        .valid(false)
+                        .explanation(input + " is not a valid character for this property")
+                        .build();
+            }
+
+            return new ValidationResult.Builder()
+                    .input(input)
+                    .subject(subject)
+                    .valid(true)
+                    .build();
+        }
+
+    }
+
+    public static final Validator UNESCAPED_SINGLE_CHAR_VALIDATOR = new Validator() {
+        @Override
+        public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
+
+            if (input == null) {
+                return new ValidationResult.Builder()
+                        .input(input)
+                        .subject(subject)
+                        .valid(false)
+                        .explanation("Input is null for this property")
+                        .build();
+            }
+
+            String unescapeString = unescapeString(input);
+
+            return new ValidationResult.Builder()
+                    .subject(subject)
+                    .input(unescapeString)
+                    .explanation("Only non-null single characters are supported")
+                    .valid((unescapeString.length() == 1 && unescapeString.charAt(0) != 0) || context.isExpressionLanguagePresent(input))
+                    .build();
+        }
+
+        private String unescapeString(String input) {
+            if (input != null && input.length() > 1) {
+                input = StringEscapeUtils.unescapeJava(input);
+            }
+            return input;
+        }
+    };
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
index ce0a9b2..432967b 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
@@ -17,7 +17,7 @@
     <modelVersion>4.0.0</modelVersion>
 
     <properties>
-        <poi.version>3.14</poi.version>
+        <poi.version>3.17</poi.version>
     </properties>
 
     <parent>
@@ -66,7 +66,6 @@
             <artifactId>poi-ooxml</artifactId>
             <version>${poi.version}</version>
         </dependency>
-
         <dependency>
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-api</artifactId>
@@ -77,6 +76,10 @@
         </dependency>
         <dependency>
             <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-standard-record-utils</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-mock</artifactId>
             <scope>test</scope>
         </dependency>

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
index 6d8274b..1e0df88 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
@@ -19,14 +19,16 @@ package org.apache.nifi.processors.poi;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.nifi.annotation.behavior.WritesAttribute;
@@ -34,6 +36,7 @@ import org.apache.nifi.annotation.behavior.WritesAttributes;
 import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.csv.CSVUtils;
 import org.apache.nifi.flowfile.FlowFile;
 import org.apache.nifi.flowfile.attributes.CoreAttributes;
 import org.apache.nifi.processor.AbstractProcessor;
@@ -48,15 +51,20 @@ import org.apache.nifi.processor.util.StandardValidators;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.ss.util.CellReference;
+import org.apache.poi.util.SAXHelper;
+import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
-import org.apache.poi.xssf.model.SharedStringsTable;
-import org.apache.poi.xssf.usermodel.XSSFRichTextString;
-import org.xml.sax.Attributes;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
+import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.helpers.XMLReaderFactory;
+
+import javax.xml.parsers.ParserConfigurationException;
 
 
 @Tags({"excel", "csv", "poi"})
@@ -78,17 +86,8 @@ public class ConvertExcelToCSVProcessor
     public static final String SHEET_NAME = "sheetname";
     public static final String ROW_NUM = "numrows";
     public static final String SOURCE_FILE_NAME = "sourcefilename";
-    private static final String SAX_CELL_REF = "c";
-    private static final String SAX_CELL_TYPE = "t";
-    private static final String SAX_CELL_ADDRESS = "r";
-    private static final String SAX_CELL_STRING = "s";
-    private static final String SAX_CELL_CONTENT_REF = "v";
-    private static final String SAX_ROW_REF = "row";
-    private static final String SAX_SHEET_NAME_REF = "sheetPr";
     private static final String DESIRED_SHEETS_DELIMITER = ",";
     private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
-    private static final String SAX_PARSER = "org.apache.xerces.parsers.SAXParser";
-    private static final Pattern CELL_ADDRESS_REGEX = Pattern.compile("^([a-zA-Z]+)([\\d]+)$");
 
     public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor
             .Builder().name("extract-sheets")
@@ -101,6 +100,35 @@ public class ConvertExcelToCSVProcessor
             .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
             .build();
 
+    public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor
+            .Builder().name("excel-extract-first-row")
+            .displayName("Number of Rows to Skip")
+            .description("The row number of the first row to start processing."
+                    + "Use this to skip over rows of data at the top of your worksheet that are not part of the dataset."
+                    + "Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.")
+            .required(true)
+            .defaultValue("0")
+            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+            .build();
+
+    public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor
+            .Builder().name("excel-extract-column-to-skip")
+            .displayName("Columns To Skip")
+            .description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. "
+                    + "Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.")
+            .required(false)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .build();
+
+    public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder()
+            .name("excel-format-values")
+            .displayName("Format Cell Values")
+            .description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.")
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .required(true)
+            .build();
+
     public static final Relationship ORIGINAL = new Relationship.Builder()
             .name("original")
             .description("Original Excel document received by this processor")
@@ -124,6 +152,24 @@ public class ConvertExcelToCSVProcessor
     protected void init(final ProcessorInitializationContext context) {
         final List<PropertyDescriptor> descriptors = new ArrayList<>();
         descriptors.add(DESIRED_SHEETS);
+        descriptors.add(ROWS_TO_SKIP);
+        descriptors.add(COLUMNS_TO_SKIP);
+        descriptors.add(FORMAT_VALUES);
+
+        descriptors.add(CSVUtils.CSV_FORMAT);
+        descriptors.add(CSVUtils.VALUE_SEPARATOR);
+        descriptors.add(CSVUtils.INCLUDE_HEADER_LINE);
+        descriptors.add(CSVUtils.QUOTE_CHAR);
+        descriptors.add(CSVUtils.ESCAPE_CHAR);
+        descriptors.add(CSVUtils.COMMENT_MARKER);
+        descriptors.add(CSVUtils.NULL_STRING);
+        descriptors.add(CSVUtils.TRIM_FIELDS);
+        descriptors.add(new PropertyDescriptor.Builder()
+                    .fromPropertyDescriptor(CSVUtils.QUOTE_MODE)
+                    .defaultValue(CSVUtils.QUOTE_NONE.getValue())
+                    .build());
+        descriptors.add(CSVUtils.RECORD_SEPARATOR);
+        descriptors.add(CSVUtils.TRAILING_DELIMITER);
         this.descriptors = Collections.unmodifiableList(descriptors);
 
         final Set<Relationship> relationships = new HashSet<>();
@@ -150,28 +196,46 @@ public class ConvertExcelToCSVProcessor
             return;
         }
 
-        try {
+        final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions().getValue();
+        final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
+
+        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);
+
+        //Switch to 0 based index
+        final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
+        final String[] sColumnsToSkip = StringUtils
+                .split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");
+
+        final List<Integer> columnsToSkip = new ArrayList<>();
+
+        if(sColumnsToSkip != null && sColumnsToSkip.length > 0) {
+            for (String c : sColumnsToSkip) {
+                try {
+                    //Switch to 0 based index
+                    columnsToSkip.add(Integer.parseInt(c) - 1);
+                } catch (NumberFormatException e) {
+                    throw new ProcessException("Invalid column in Columns to Skip list.", e);
+                }
+            }
+        }
 
+        try {
             session.read(flowFile, new InputStreamCallback() {
                 @Override
                 public void process(InputStream inputStream) throws IOException {
 
                     try {
-                        String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS)
-                                .evaluateAttributeExpressions().getValue();
-
                         OPCPackage pkg = OPCPackage.open(inputStream);
                         XSSFReader r = new XSSFReader(pkg);
-                        SharedStringsTable sst = r.getSharedStringsTable();
+                        ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
+                        StylesTable styles = r.getStylesTable();
                         XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
 
                         if (desiredSheetsDelimited != null) {
-
                             String[] desiredSheets = StringUtils
                                     .split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
 
                             if (desiredSheets != null) {
-
                                 while (iter.hasNext()) {
                                     InputStream sheet = iter.next();
                                     String sheetName = iter.getSheetName();
@@ -179,7 +243,8 @@ public class ConvertExcelToCSVProcessor
                                     for (int i = 0; i < desiredSheets.length; i++) {
                                         //If the sheetName is a desired one parse it
                                         if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
-                                            handleExcelSheet(session, flowFile, sst, sheet, sheetName);
+                                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
+                                            handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                                             break;
                                         }
                                     }
@@ -191,13 +256,17 @@ public class ConvertExcelToCSVProcessor
                         } else {
                             //Get all of the sheets in the document.
                             while (iter.hasNext()) {
-                                handleExcelSheet(session, flowFile, sst, iter.next(), iter.getSheetName());
+                                InputStream sheet = iter.next();
+                                String sheetName = iter.getSheetName();
+
+                                ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheetName, formatValues, sst, styles);
+                                handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
                             }
                         }
                     } catch (InvalidFormatException ife) {
                         getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
                         throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife);
-                    } catch (OpenXML4JException e) {
+                    } catch (OpenXML4JException | SAXException e) {
                         getLogger().error("Error occurred while processing Excel document metadata", e);
                     }
                 }
@@ -206,7 +275,7 @@ public class ConvertExcelToCSVProcessor
             session.transfer(flowFile, ORIGINAL);
 
         } catch (RuntimeException ex) {
-            getLogger().error("Failed to process incoming Excel document", ex);
+            getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
             FlowFile failedFlowFile = session.putAttribute(flowFile,
                     ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
             session.transfer(failedFlowFile, FAILURE);
@@ -220,45 +289,48 @@ public class ConvertExcelToCSVProcessor
      * @param session
      *  The NiFi ProcessSession instance for the current invocation.
      */
-    private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
-            SharedStringsTable sst, final InputStream sheetInputStream, String sName) throws IOException {
+    private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig,
+                                  CSVFormat csvFormat) throws IOException {
 
         FlowFile ff = session.create();
         try {
+            final DataFormatter formatter = new DataFormatter();
+            final InputSource sheetSource = new InputSource(sheetInputStream);
+
+            final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
+
+            final XMLReader parser = SAXHelper.newXMLReader();
+
+            //If Value Formatting is set to false then don't pass in the styles table.
+            // This will cause the XSSF Handler to return the raw value instead of the formatted one.
+            final StylesTable sst = readConfig.getFormatValues()?readConfig.getStyles():null;
+
+            final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(
+                    sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false);
 
-            XMLReader parser =
-                    XMLReaderFactory.createXMLReader(
-                            SAX_PARSER
-                    );
-            ExcelSheetRowHandler handler = new ExcelSheetRowHandler(sst);
             parser.setContentHandler(handler);
 
             ff = session.write(ff, new OutputStreamCallback() {
                 @Override
                 public void process(OutputStream out) throws IOException {
-                    InputSource sheetSource = new InputSource(sheetInputStream);
-                    ExcelSheetRowHandler eh = null;
+                    PrintStream outPrint = new PrintStream(out);
+                    sheetHandler.setOutput(outPrint);
+
                     try {
-                        eh = (ExcelSheetRowHandler) parser.getContentHandler();
-                        eh.setFlowFileOutputStream(out);
-                        parser.setContentHandler(eh);
                         parser.parse(sheetSource);
+
                         sheetInputStream.close();
+
+                        sheetHandler.close();
+                        outPrint.close();
                     } catch (SAXException se) {
-                        getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{eh.getSheetName()}, se);
+                        getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{readConfig.getSheetName()}, se);
                     }
                 }
             });
 
-            if (handler.getSheetName().equals(UNKNOWN_SHEET_NAME)) {
-                //Used the named parsed from the handler. This logic is only here because IF the handler does find a value that should take precedence.
-                ff = session.putAttribute(ff, SHEET_NAME, sName);
-            } else {
-                ff = session.putAttribute(ff, SHEET_NAME, handler.getSheetName());
-                sName = handler.getSheetName();
-            }
-
-            ff = session.putAttribute(ff, ROW_NUM, new Long(handler.getRowCount()).toString());
+            ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
+            ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());
 
             if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
                 ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
@@ -268,13 +340,13 @@ public class ConvertExcelToCSVProcessor
 
             //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
             ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
-                    ff.getAttribute(CoreAttributes.FILENAME.key()), sName));
+                    ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
             ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
 
             session.transfer(ff, SUCCESS);
 
-        } catch (SAXException saxE) {
-            getLogger().error("Failed to create instance of SAXParser {}", new Object[]{SAX_PARSER}, saxE);
+        } catch (SAXException | ParserConfigurationException saxE) {
+            getLogger().error("Failed to create instance of Parser.", saxE);
             ff = session.putAttribute(ff,
                     ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
             session.transfer(ff, FAILURE);
@@ -283,162 +355,161 @@ public class ConvertExcelToCSVProcessor
         }
     }
 
-    static Integer columnToIndex(String col) {
-        int length = col.length();
-        int accumulator = 0;
-        for (int i = length; i > 0; i--) {
-            char c = col.charAt(i - 1);
-            int x = ((int) c) - 64;
-            accumulator += x * Math.pow(26, length - i);
+    /**
+     * Uses the XSSF Event SAX helpers to do most of the work
+     *  of parsing the Sheet XML, and outputs the contents
+     *  as a (basic) CSV.
+     */
+    private class SheetToCSV implements XSSFSheetXMLHandler.SheetContentsHandler {
+        private ExcelSheetReadConfig readConfig;
+        CSVFormat csvFormat;
+
+        private boolean firstCellOfRow;
+        private boolean skipRow;
+        private int currentRow = -1;
+        private int currentCol = -1;
+        private int rowCount = 0;
+        private boolean rowHasValues=false;
+        private int skippedColumns=0;
+
+        private CSVPrinter printer;
+
+        private boolean firstRow=false;
+
+        private ArrayList<Object> fieldValues;
+
+        public int getRowCount(){
+            return rowCount;
         }
-        // Make it to start with 0.
-        return accumulator - 1;
-    }
 
-    private static class CellAddress {
-        final int row;
-        final int col;
+        public void setOutput(PrintStream output){
+            final OutputStreamWriter streamWriter = new OutputStreamWriter(output);
 
-        private CellAddress(int row, int col) {
-            this.row = row;
-            this.col = col;
+            try {
+                printer = new CSVPrinter(streamWriter, csvFormat);
+            } catch (IOException e) {
+                throw new ProcessException("Failed to create CSV Printer.", e);
+            }
         }
-    }
 
-    /**
-     * Extracts every row from an Excel Sheet and generates a corresponding JSONObject whose key is the Excel CellAddress and value
-     * is the content of that CellAddress converted to a String
-     */
-    private class ExcelSheetRowHandler
-            extends DefaultHandler {
-
-        private SharedStringsTable sst;
-        private String currentContent;
-        private boolean nextIsString;
-        private CellAddress firstCellAddress;
-        private CellAddress firstRowLastCellAddress;
-        private CellAddress previousCellAddress;
-        private CellAddress nextCellAddress;
-        private OutputStream outputStream;
-        private boolean firstColInRow;
-        long rowCount;
-        String sheetName;
-
-        private ExcelSheetRowHandler(SharedStringsTable sst) {
-            this.sst = sst;
-            this.firstColInRow = true;
-            this.rowCount = 0l;
-            this.sheetName = UNKNOWN_SHEET_NAME;
+        public SheetToCSV(ExcelSheetReadConfig readConfig, CSVFormat csvFormat){
+            this.readConfig = readConfig;
+            this.csvFormat = csvFormat;
         }
 
-        public void setFlowFileOutputStream(OutputStream outputStream) {
-            this.outputStream = outputStream;
+        @Override
+        public void startRow(int rowNum) {
+            if(rowNum <= readConfig.getOverrideFirstRow()) {
+                skipRow = true;
+                return;
+            }
+
+            // Prepare for this row
+            skipRow = false;
+            firstCellOfRow = true;
+            firstRow = currentRow==-1;
+            currentRow = rowNum;
+            currentCol = -1;
+            rowHasValues = false;
+
+            fieldValues = new ArrayList<>();
         }
 
+        @Override
+        public void endRow(int rowNum) {
+            if(skipRow) {
+                return;
+            }
 
-        public void startElement(String uri, String localName, String name,
-                Attributes attributes) throws SAXException {
+            if(firstRow){
+                readConfig.setLastColumn(currentCol);
+            }
 
-            if (name.equals(SAX_CELL_REF)) {
-                String cellType = attributes.getValue(SAX_CELL_TYPE);
-                // Analyze cell address.
-                Matcher cellAddressMatcher = CELL_ADDRESS_REGEX.matcher(attributes.getValue(SAX_CELL_ADDRESS));
-                if (cellAddressMatcher.matches()) {
-                    String col = cellAddressMatcher.group(1);
-                    String row = cellAddressMatcher.group(2);
-                    nextCellAddress = new CellAddress(Integer.parseInt(row), columnToIndex(col));
+            //if there was no data in this row, don't write it
+            if(!rowHasValues) {
+                return;
+            }
 
-                    if (firstCellAddress == null) {
-                        firstCellAddress = nextCellAddress;
-                    }
-                }
-                if (cellType != null && cellType.equals(SAX_CELL_STRING)) {
-                    nextIsString = true;
-                } else {
-                    nextIsString = false;
-                }
-            } else if (name.equals(SAX_ROW_REF)) {
-                if (firstRowLastCellAddress == null) {
-                    firstRowLastCellAddress = previousCellAddress;
-                }
-                firstColInRow = true;
-                previousCellAddress = null;
-                nextCellAddress = null;
-            } else if (name.equals(SAX_SHEET_NAME_REF)) {
-                sheetName = attributes.getValue(0);
+            // Ensure the correct number of columns
+            int columnsToAdd = (readConfig.getLastColumn() - currentCol) - readConfig.getColumnsToSkip().size();
+            for (int i=0; i<columnsToAdd; i++) {
+                fieldValues.add(null);
             }
 
-            currentContent = "";
+            try {
+                printer.printRecord(fieldValues);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+
+            rowCount++;
         }
 
-        private void fillEmptyColumns(int nextColumn) throws IOException {
-            final CellAddress previousCell = previousCellAddress != null ? previousCellAddress : firstCellAddress;
-            if (previousCell != null) {
-                for (int i = 0; i < (nextColumn - previousCell.col); i++) {
-                    // Fill columns.
-                    outputStream.write(",".getBytes());
-                }
+        @Override
+        public void cell(String cellReference, String formattedValue,
+                         XSSFComment comment) {
+            if(skipRow) {
+                return;
             }
-        }
 
-        public void endElement(String uri, String localName, String name)
-                throws SAXException {
+            // gracefully handle missing CellRef here in a similar way as XSSFCell does
+            if(cellReference == null) {
+                cellReference = new CellAddress(currentRow, currentCol).formatAsString();
+            }
+
+            // Did we miss any cells?
+            int thisCol = (new CellReference(cellReference)).getCol();
+
+            // Should we skip this
 
-            if (nextIsString) {
-                int idx = Integer.parseInt(currentContent);
-                currentContent = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
-                nextIsString = false;
+            //Use the first row of the file to decide on the area of data to export
+            if(firstRow && firstCellOfRow){
+                readConfig.setFirstRow(currentRow);
+                readConfig.setFirstColumn(thisCol);
             }
 
-            if (name.equals(SAX_CELL_CONTENT_REF)
-                    // Limit scanning from the first column, and up to the last column.
-                    && (firstCellAddress == null || firstCellAddress.col <= nextCellAddress.col)
-                    && (firstRowLastCellAddress == null || nextCellAddress.col <= firstRowLastCellAddress.col)) {
-                try {
-                    // A cell is found.
-                    fillEmptyColumns(nextCellAddress.col);
-                    firstColInRow = false;
-                    outputStream.write(currentContent.getBytes());
-                    // Keep previously found cell address.
-                    previousCellAddress = nextCellAddress;
-                } catch (IOException e) {
-                    getLogger().error("IO error encountered while writing content of parsed cell " +
-                            "value from sheet {}", new Object[]{getSheetName()}, e);
-                }
+            //if this cell falls outside our area, or has been explcitely marked as a skipped column, return and don't write it out.
+            if(!firstRow && (thisCol < readConfig.getFirstColumn() || thisCol > readConfig.getLastColumn())){
+                return;
             }
 
-            if (name.equals(SAX_ROW_REF)) {
-                //If this is the first row and the end of the row element has been encountered then that means no columns were present.
-                if (!firstColInRow) {
-                    try {
-                        if (firstRowLastCellAddress != null) {
-                            fillEmptyColumns(firstRowLastCellAddress.col);
-                        }
-                        rowCount++;
-                        outputStream.write("\n".getBytes());
-                    } catch (IOException e) {
-                        getLogger().error("IO error encountered while writing new line indicator", e);
-                    }
-                }
+            if(readConfig.getColumnsToSkip().contains(thisCol)){
+                skippedColumns++;
+                return;
             }
 
-        }
+            int missedCols = (thisCol - readConfig.getFirstColumn()) - (currentCol - readConfig.getFirstColumn()) - 1;
+            if(firstCellOfRow){
+                missedCols = (thisCol - readConfig.getFirstColumn());
+            }
+
+            missedCols -= skippedColumns;
+
+            if (firstCellOfRow) {
+                firstCellOfRow = false;
+            }
 
-        public void characters(char[] ch, int start, int length)
-                throws SAXException {
-            currentContent += new String(ch, start, length);
+            for (int i=0; i<missedCols; i++) {
+                fieldValues.add(null);
+            }
+            currentCol = thisCol;
+
+            fieldValues.add(formattedValue);
+
+            rowHasValues = true;
+            skippedColumns = 0;
         }
 
-        public long getRowCount() {
-            return rowCount;
+        @Override
+        public void headerFooter(String s, boolean b, String s1) {
+
         }
 
-        public String getSheetName() {
-            return sheetName;
+        public void close() throws IOException {
+            printer.close();
         }
     }
 
-
     /**
      * Takes the original input filename and updates it by removing the file extension and replacing it with
      * the .csv extension.
@@ -472,4 +543,87 @@ public class ConvertExcelToCSVProcessor
         return stringBuilder.toString();
     }
 
+    private class ExcelSheetReadConfig {
+        public String getSheetName(){
+            return sheetName;
+        }
+
+        public int getFirstColumn(){
+            return firstColumn;
+        }
+
+        public void setFirstColumn(int value){
+            this.firstColumn = value;
+        }
+
+        public int getLastColumn(){
+            return lastColumn;
+        }
+
+        public void setLastColumn(int lastColumn) {
+            this.lastColumn = lastColumn;
+        }
+
+        public int getOverrideFirstRow(){
+            return overrideFirstRow;
+        }
+
+        public boolean getFormatValues() {
+            return formatValues;
+        }
+
+        public int getFirstRow(){
+            return firstRow;
+        }
+
+        public void setFirstRow(int value){
+            firstRow = value;
+        }
+
+        public int getLastRow(){
+            return lastRow;
+        }
+
+        public void setLastRow(int value){
+            lastRow = value;
+        }
+
+        public List<Integer> getColumnsToSkip(){
+            return columnsToSkip;
+        }
+
+        public ReadOnlySharedStringsTable getSharedStringsTable(){
+            return sst;
+        }
+
+        public StylesTable getStyles(){
+            return styles;
+        }
+
+        private int firstColumn;
+        private int lastColumn;
+
+        private int firstRow;
+        private int lastRow;
+        private int overrideFirstRow;
+        private String sheetName;
+        private boolean formatValues;
+
+        private ReadOnlySharedStringsTable sst;
+        private StylesTable styles;
+
+        private List<Integer> columnsToSkip;
+
+        public ExcelSheetReadConfig(List<Integer> columnsToSkip, int overrideFirstRow, String sheetName, boolean formatValues,
+                                    ReadOnlySharedStringsTable sst, StylesTable styles){
+
+            this.sheetName = sheetName;
+            this.columnsToSkip = columnsToSkip;
+            this.overrideFirstRow = overrideFirstRow;
+            this.formatValues = formatValues;
+
+            this.sst = sst;
+            this.styles = styles;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
index 1df2568..9e9131f 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
@@ -20,9 +20,9 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.File;
-import java.nio.charset.StandardCharsets;
 import java.util.List;
 
+import org.apache.nifi.csv.CSVUtils;
 import org.apache.nifi.flowfile.attributes.CoreAttributes;
 import org.apache.nifi.util.LogMessage;
 import org.apache.nifi.util.MockFlowFile;
@@ -42,16 +42,6 @@ public class ConvertExcelToCSVProcessorTest {
     }
 
     @Test
-    public void testColToIndex() {
-        assertEquals(Integer.valueOf(0), ConvertExcelToCSVProcessor.columnToIndex("A"));
-        assertEquals(Integer.valueOf(1), ConvertExcelToCSVProcessor.columnToIndex("B"));
-        assertEquals(Integer.valueOf(25), ConvertExcelToCSVProcessor.columnToIndex("Z"));
-        assertEquals(Integer.valueOf(29), ConvertExcelToCSVProcessor.columnToIndex("AD"));
-        assertEquals(Integer.valueOf(239), ConvertExcelToCSVProcessor.columnToIndex("IF"));
-        assertEquals(Integer.valueOf(16383), ConvertExcelToCSVProcessor.columnToIndex("XFD"));
-    }
-
-    @Test
     public void testMultipleSheetsGeneratesMultipleFlowFiles() throws Exception {
 
         testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
@@ -81,6 +71,144 @@ public class ConvertExcelToCSVProcessorTest {
 
     }
 
+    @Test
+    public void testDataFormatting() throws Exception {
+        testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
+
+        testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "false");
+
+        testRunner.run();
+
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+        MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+        Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+        assertTrue(rowsSheet == 9);
+
+        ff.assertContentEquals("Numbers,Timestamps,Money\n" +
+                "1234.4559999999999,42736.5,123.45\n" +
+                "1234.4559999999999,42736.5,123.45\n" +
+                "1234.4559999999999,42736.5,123.45\n" +
+                "1234.4559999999999,42736.5,1023.45\n" +
+                "1234.4559999999999,42736.5,1023.45\n" +
+                "987654321,42736.5,1023.45\n" +
+                "987654321,,\n" +
+                "987654321,,\n");
+    }
+
+    @Test
+    public void testQuoting() throws Exception {
+        testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
+
+        testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_MINIMAL);
+        testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
+
+        testRunner.run();
+
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+        MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+        Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+        assertTrue(rowsSheet == 9);
+
+        ff.assertContentEquals("Numbers,Timestamps,Money\n" +
+                "1234.456,1/1/17,$   123.45\n" +
+                "1234.46,12:00:00 PM,£   123.45\n" +
+                "1234.5,\"Sunday, January 01, 2017\",¥   123.45\n" +
+                "\"1,234.46\",1/1/17 12:00,\"$   1,023.45\"\n" +
+                "\"1,234.4560\",12:00 PM,\"£   1,023.45\"\n" +
+                "9.88E+08,2017/01/01/ 12:00,\"¥   1,023.45\"\n" +
+                "9.877E+08,,\n" +
+                "9.8765E+08,,\n");
+    }
+
+    @Test
+    public void testSkipRows() throws Exception {
+        testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
+
+        testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "2");
+        testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
+
+        testRunner.run();
+
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+        MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+        Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+        assertEquals("Row count does match expected value.", "7", rowsSheet.toString());
+
+        ff.assertContentEquals("1234.46,12:00:00 PM,£   123.45\n" +
+                "1234.5,Sunday\\, January 01\\, 2017,¥   123.45\n" +
+                "1\\,234.46,1/1/17 12:00,$   1\\,023.45\n" +
+                "1\\,234.4560,12:00 PM,£   1\\,023.45\n" +
+                "9.88E+08,2017/01/01/ 12:00,¥   1\\,023.45\n" +
+                "9.877E+08,,\n" +
+                "9.8765E+08,,\n");
+    }
+
+    @Test
+    public void testSkipColumns() throws Exception {
+        testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
+
+        testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "2");
+        testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
+
+        testRunner.run();
+
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+        MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+        Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+        assertTrue(rowsSheet == 9);
+
+        ff.assertContentEquals("Numbers,Money\n" +
+                "1234.456,$   123.45\n" +
+                "1234.46,£   123.45\n" +
+                "1234.5,¥   123.45\n" +
+                "1\\,234.46,$   1\\,023.45\n" +
+                "1\\,234.4560,£   1\\,023.45\n" +
+                "9.88E+08,¥   1\\,023.45\n" +
+                "9.877E+08,\n" +
+                "9.8765E+08,\n");
+    }
+
+    @Test
+    public void testCustomDelimiters() throws Exception {
+        testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
+
+        testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "|");
+        testRunner.setProperty(CSVUtils.RECORD_SEPARATOR, "\\r\\n");
+        testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
+
+        testRunner.run();
+
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+        testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+        MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+        Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+        assertTrue(rowsSheet == 9);
+
+        ff.assertContentEquals("Numbers|Timestamps|Money\r\n" +
+                "1234.456|1/1/17|$   123.45\r\n" +
+                "1234.46|12:00:00 PM|£   123.45\r\n" +
+                "1234.5|Sunday, January 01, 2017|¥   123.45\r\n" +
+                "1,234.46|1/1/17 12:00|$   1,023.45\r\n" +
+                "1,234.4560|12:00 PM|£   1,023.45\r\n" +
+                "9.88E+08|2017/01/01/ 12:00|¥   1,023.45\r\n" +
+                "9.877E+08||\r\n" +
+                "9.8765E+08||\r\n");
+    }
+
     /**
      * Validates that all sheets in the Excel document are exported.
      *
@@ -181,7 +309,7 @@ public class ConvertExcelToCSVProcessorTest {
         MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
         Long l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
         assertTrue(l == 8l);
-        ff.isContentEqual("test", StandardCharsets.UTF_8);
+
         ff.assertContentEquals(new File("src/test/resources/with-blank-cells.csv"));
     }
 
@@ -199,8 +327,8 @@ public class ConvertExcelToCSVProcessorTest {
         testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1);
 
         List<LogMessage> errorMessages = testRunner.getLogger().getErrorMessages();
-        Assert.assertEquals(2, errorMessages.size());
+        Assert.assertEquals(1, errorMessages.size());
         String messageText = errorMessages.get(0).getMsg();
-        Assert.assertTrue(messageText.contains("Excel") && messageText.contains("supported"));
+        Assert.assertTrue(messageText.contains("Excel") && messageText.contains("OLE2"));
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx
new file mode 100644
index 0000000..a9428e2
Binary files /dev/null and b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx differ

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
deleted file mode 100644
index 17152aa..0000000
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nifi.csv;
-
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.QuoteMode;
-import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.nifi.components.AllowableValue;
-import org.apache.nifi.components.PropertyDescriptor;
-import org.apache.nifi.components.PropertyValue;
-import org.apache.nifi.controller.ConfigurationContext;
-import org.apache.nifi.processor.util.StandardValidators;
-
-public class CSVUtils {
-
-    static final AllowableValue CUSTOM = new AllowableValue("custom", "Custom Format",
-        "The format of the CSV is configured by using the properties of this Controller Service, such as Value Separator");
-    static final AllowableValue RFC_4180 = new AllowableValue("rfc-4180", "RFC 4180", "CSV data follows the RFC 4180 Specification defined at https://tools.ietf.org/html/rfc4180");
-    static final AllowableValue EXCEL = new AllowableValue("excel", "Microsoft Excel", "CSV data follows the format used by Microsoft Excel");
-    static final AllowableValue TDF = new AllowableValue("tdf", "Tab-Delimited", "CSV data is Tab-Delimited instead of Comma Delimited");
-    static final AllowableValue INFORMIX_UNLOAD = new AllowableValue("informix-unload", "Informix Unload", "The format used by Informix when issuing the UNLOAD TO file_name command");
-    static final AllowableValue INFORMIX_UNLOAD_CSV = new AllowableValue("informix-unload-csv", "Informix Unload Escape Disabled",
-        "The format used by Informix when issuing the UNLOAD TO file_name command with escaping disabled");
-    static final AllowableValue MYSQL = new AllowableValue("mysql", "MySQL Format", "CSV data follows the format used by MySQL");
-
-    static final PropertyDescriptor CSV_FORMAT = new PropertyDescriptor.Builder()
-        .name("CSV Format")
-        .description("Specifies which \"format\" the CSV data is in, or specifies if custom formatting should be used.")
-        .expressionLanguageSupported(false)
-        .allowableValues(CUSTOM, RFC_4180, EXCEL, TDF, MYSQL, INFORMIX_UNLOAD, INFORMIX_UNLOAD_CSV)
-        .defaultValue(CUSTOM.getValue())
-        .required(true)
-        .build();
-    static final PropertyDescriptor VALUE_SEPARATOR = new PropertyDescriptor.Builder()
-        .name("Value Separator")
-        .description("The character that is used to separate values/fields in a CSV Record")
-        .addValidator(CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR)
-        .expressionLanguageSupported(false)
-        .defaultValue(",")
-        .required(true)
-        .build();
-    static final PropertyDescriptor QUOTE_CHAR = new PropertyDescriptor.Builder()
-        .name("Quote Character")
-        .description("The character that is used to quote values so that escape characters do not have to be used")
-        .addValidator(new CSVValidators.SingleCharacterValidator())
-        .expressionLanguageSupported(false)
-        .defaultValue("\"")
-        .required(true)
-        .build();
-    static final PropertyDescriptor FIRST_LINE_IS_HEADER = new PropertyDescriptor.Builder()
-        .name("Skip Header Line")
-        .displayName("Treat First Line as Header")
-        .description("Specifies whether or not the first line of CSV should be considered a Header or should be considered a record. If the Schema Access Strategy "
-            + "indicates that the columns must be defined in the header, then this property will be ignored, since the header must always be "
-            + "present and won't be processed as a Record. Otherwise, if 'true', then the first line of CSV data will not be processed as a record and if 'false',"
-            + "then the first line will be interpreted as a record.")
-        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
-        .expressionLanguageSupported(false)
-        .allowableValues("true", "false")
-        .defaultValue("false")
-        .required(true)
-        .build();
-    static final PropertyDescriptor IGNORE_CSV_HEADER = new PropertyDescriptor.Builder()
-        .name("ignore-csv-header")
-        .displayName("Ignore CSV Header Column Names")
-        .description("If the first line of a CSV is a header, and the configured schema does not match the fields named in the header line, this controls how "
-            + "the Reader will interpret the fields. If this property is true, then the field names mapped to each column are driven only by the configured schema and "
-            + "any fields not in the schema will be ignored. If this property is false, then the field names found in the CSV Header will be used as the names of the "
-            + "fields.")
-        .expressionLanguageSupported(false)
-        .allowableValues("true", "false")
-        .defaultValue("false")
-        .required(false)
-        .build();
-    static final PropertyDescriptor COMMENT_MARKER = new PropertyDescriptor.Builder()
-        .name("Comment Marker")
-        .description("The character that is used to denote the start of a comment. Any line that begins with this comment will be ignored.")
-        .addValidator(new CSVValidators.SingleCharacterValidator())
-        .expressionLanguageSupported(false)
-        .required(false)
-        .build();
-    static final PropertyDescriptor ESCAPE_CHAR = new PropertyDescriptor.Builder()
-        .name("Escape Character")
-        .description("The character that is used to escape characters that would otherwise have a specific meaning to the CSV Parser.")
-        .addValidator(new CSVValidators.SingleCharacterValidator())
-        .expressionLanguageSupported(false)
-        .defaultValue("\\")
-        .required(true)
-        .build();
-    static final PropertyDescriptor NULL_STRING = new PropertyDescriptor.Builder()
-        .name("Null String")
-        .description("Specifies a String that, if present as a value in the CSV, should be considered a null field instead of using the literal value.")
-        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
-        .expressionLanguageSupported(false)
-        .required(false)
-        .build();
-    static final PropertyDescriptor TRIM_FIELDS = new PropertyDescriptor.Builder()
-        .name("Trim Fields")
-        .description("Whether or not white space should be removed from the beginning and end of fields")
-        .expressionLanguageSupported(false)
-        .allowableValues("true", "false")
-        .defaultValue("true")
-        .required(true)
-        .build();
-
-    // CSV Format fields for writers only
-    static final AllowableValue QUOTE_ALL = new AllowableValue("ALL", "Quote All Values", "All values will be quoted using the configured quote character.");
-    static final AllowableValue QUOTE_MINIMAL = new AllowableValue("MINIMAL", "Quote Minimal",
-        "Values will be quoted only if they are contain special characters such as newline characters or field separators.");
-    static final AllowableValue QUOTE_NON_NUMERIC = new AllowableValue("NON_NUMERIC", "Quote Non-Numeric Values", "Values will be quoted unless the value is a number.");
-    static final AllowableValue QUOTE_NONE = new AllowableValue("NONE", "Do Not Quote Values",
-        "Values will not be quoted. Instead, all special characters will be escaped using the configured escape character.");
-
-    static final PropertyDescriptor QUOTE_MODE = new PropertyDescriptor.Builder()
-        .name("Quote Mode")
-        .description("Specifies how fields should be quoted when they are written")
-        .expressionLanguageSupported(false)
-        .allowableValues(QUOTE_ALL, QUOTE_MINIMAL, QUOTE_NON_NUMERIC, QUOTE_NONE)
-        .defaultValue(QUOTE_MINIMAL.getValue())
-        .required(true)
-        .build();
-    static final PropertyDescriptor TRAILING_DELIMITER = new PropertyDescriptor.Builder()
-        .name("Include Trailing Delimiter")
-        .description("If true, a trailing delimiter will be added to each CSV Record that is written. If false, the trailing delimiter will be omitted.")
-        .expressionLanguageSupported(false)
-        .allowableValues("true", "false")
-        .defaultValue("false")
-        .required(true)
-        .build();
-    static final PropertyDescriptor RECORD_SEPARATOR = new PropertyDescriptor.Builder()
-        .name("Record Separator")
-        .description("Specifies the characters to use in order to separate CSV Records")
-        .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
-        .expressionLanguageSupported(false)
-        .defaultValue("\\n")
-        .required(true)
-        .build();
-    static final PropertyDescriptor INCLUDE_HEADER_LINE = new PropertyDescriptor.Builder()
-        .name("Include Header Line")
-        .description("Specifies whether or not the CSV column names should be written out as the first line.")
-        .allowableValues("true", "false")
-        .defaultValue("true")
-        .required(true)
-        .build();
-
-    static CSVFormat createCSVFormat(final ConfigurationContext context) {
-        final String formatName = context.getProperty(CSV_FORMAT).getValue();
-        if (formatName.equalsIgnoreCase(CUSTOM.getValue())) {
-            return buildCustomFormat(context);
-        }
-        if (formatName.equalsIgnoreCase(RFC_4180.getValue())) {
-            return CSVFormat.RFC4180;
-        } else if (formatName.equalsIgnoreCase(EXCEL.getValue())) {
-            return CSVFormat.EXCEL;
-        } else if (formatName.equalsIgnoreCase(TDF.getValue())) {
-            return CSVFormat.TDF;
-        } else if (formatName.equalsIgnoreCase(MYSQL.getValue())) {
-            return CSVFormat.MYSQL;
-        } else if (formatName.equalsIgnoreCase(INFORMIX_UNLOAD.getValue())) {
-            return CSVFormat.INFORMIX_UNLOAD;
-        } else if (formatName.equalsIgnoreCase(INFORMIX_UNLOAD_CSV.getValue())) {
-            return CSVFormat.INFORMIX_UNLOAD_CSV;
-        } else {
-            return CSVFormat.DEFAULT;
-        }
-    }
-
-    private static char getUnescapedChar(final ConfigurationContext context, final PropertyDescriptor property) {
-        return StringEscapeUtils.unescapeJava(context.getProperty(property).getValue()).charAt(0);
-    }
-
-    private static char getChar(final ConfigurationContext context, final PropertyDescriptor property) {
-        return CSVUtils.unescape(context.getProperty(property).getValue()).charAt(0);
-    }
-
-    private static CSVFormat buildCustomFormat(final ConfigurationContext context) {
-        final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR);
-        CSVFormat format = CSVFormat.newFormat(valueSeparator)
-            .withAllowMissingColumnNames()
-            .withIgnoreEmptyLines();
-
-        final PropertyValue skipHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER);
-        if (skipHeaderPropertyValue.getValue() != null && skipHeaderPropertyValue.asBoolean()) {
-            format = format.withFirstRecordAsHeader();
-        }
-
-        format = format.withQuote(getChar(context, QUOTE_CHAR));
-        format = format.withEscape(getChar(context, ESCAPE_CHAR));
-        format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean());
-
-        if (context.getProperty(COMMENT_MARKER).isSet()) {
-            format = format.withCommentMarker(getChar(context, COMMENT_MARKER));
-        }
-        if (context.getProperty(NULL_STRING).isSet()) {
-            format = format.withNullString(CSVUtils.unescape(context.getProperty(NULL_STRING).getValue()));
-        }
-
-        final PropertyValue quoteValue = context.getProperty(QUOTE_MODE);
-        if (quoteValue != null) {
-            final QuoteMode quoteMode = QuoteMode.valueOf(quoteValue.getValue());
-            format = format.withQuoteMode(quoteMode);
-        }
-
-        final PropertyValue trailingDelimiterValue = context.getProperty(TRAILING_DELIMITER);
-        if (trailingDelimiterValue != null) {
-            final boolean trailingDelimiter = trailingDelimiterValue.asBoolean();
-            format = format.withTrailingDelimiter(trailingDelimiter);
-        }
-
-        final PropertyValue recordSeparator = context.getProperty(RECORD_SEPARATOR);
-        if (recordSeparator != null) {
-            final String separator = unescape(recordSeparator.getValue());
-            format = format.withRecordSeparator(separator);
-        }
-
-        return format;
-    }
-
-
-    public static String unescape(final String input) {
-        if (input == null) {
-            return input;
-        }
-
-        return input.replace("\\t", "\t")
-            .replace("\\n", "\n")
-            .replace("\\r", "\r");
-    }
-}

http://git-wip-us.apache.org/repos/asf/nifi/blob/fd00df3d/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java
deleted file mode 100644
index 5979407..0000000
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nifi.csv;
-
-import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.nifi.components.ValidationContext;
-import org.apache.nifi.components.ValidationResult;
-import org.apache.nifi.components.Validator;
-
-import java.util.HashSet;
-import java.util.Set;
-
-public class CSVValidators {
-
-    public static class SingleCharacterValidator implements Validator {
-        private static final Set<String> illegalChars = new HashSet<>();
-
-        static {
-            illegalChars.add("\r");
-            illegalChars.add("\n");
-        }
-
-        @Override
-        public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
-
-            if (input == null) {
-                return new ValidationResult.Builder()
-                        .input(input)
-                        .subject(subject)
-                        .valid(false)
-                        .explanation("Input is null for this property")
-                        .build();
-            }
-
-            final String unescaped = CSVUtils.unescape(input);
-            if (unescaped.length() != 1) {
-                return new ValidationResult.Builder()
-                        .input(input)
-                        .subject(subject)
-                        .valid(false)
-                        .explanation("Value must be exactly 1 character but was " + input.length() + " in length")
-                        .build();
-            }
-
-            if (illegalChars.contains(unescaped)) {
-                return new ValidationResult.Builder()
-                        .input(input)
-                        .subject(subject)
-                        .valid(false)
-                        .explanation(input + " is not a valid character for this property")
-                        .build();
-            }
-
-            return new ValidationResult.Builder()
-                    .input(input)
-                    .subject(subject)
-                    .valid(true)
-                    .build();
-        }
-
-    }
-
-    public static final Validator UNESCAPED_SINGLE_CHAR_VALIDATOR = new Validator() {
-        @Override
-        public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
-
-            if (input == null) {
-                return new ValidationResult.Builder()
-                        .input(input)
-                        .subject(subject)
-                        .valid(false)
-                        .explanation("Input is null for this property")
-                        .build();
-            }
-
-            String unescapeString = unescapeString(input);
-
-            return new ValidationResult.Builder()
-                    .subject(subject)
-                    .input(unescapeString)
-                    .explanation("Only non-null single characters are supported")
-                    .valid((unescapeString.length() == 1 && unescapeString.charAt(0) != 0) || context.isExpressionLanguagePresent(input))
-                    .build();
-        }
-
-        private String unescapeString(String input) {
-            if (input != null && input.length() > 1) {
-                input = StringEscapeUtils.unescapeJava(input);
-            }
-            return input;
-        }
-    };
-
-}
\ No newline at end of file


Mime
View raw message