carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [1/2] incubator-carbondata git commit: add comment option
Date Wed, 07 Sep 2016 12:08:28 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 4aa22415d -> 2af709eb9


add comment option

fix code style

fixed testcase

renew some files


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/4cb3b5e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/4cb3b5e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/4cb3b5e4

Branch: refs/heads/master
Commit: 4cb3b5e42aab5e3c4739744c0844fda455a477f7
Parents: 4aa2241
Author: lion-x <xlion.pku@gmail.com>
Authored: Thu Sep 1 19:55:43 2016 +0800
Committer: chenliang613 <chenliang613@apache.org>
Committed: Wed Sep 7 20:07:19 2016 +0800

----------------------------------------------------------------------
 .../hadoop/test/util/StoreCreator.java          |  4 +++
 .../carbondata/spark/load/CarbonLoadModel.java  | 30 ++++++++++++++++++++
 .../carbondata/spark/load/CarbonLoaderUtil.java |  4 +++
 .../spark/util/GlobalDictionaryUtil.scala       |  9 ++++++
 .../org/apache/spark/sql/CarbonSqlParser.scala  |  2 +-
 .../execution/command/carbonTableSchema.scala   |  3 ++
 .../spark/src/test/resources/comment.csv        |  5 ++++
 .../dataload/TestLoadDataWithHiveSyntax.scala   | 20 +++++++++++--
 .../util/ExternalColumnDictionaryTestCase.scala |  1 +
 ...GlobalDictionaryUtilConcurrentTestCase.scala |  1 +
 .../util/GlobalDictionaryUtilTestCase.scala     |  1 +
 .../api/dataloader/DataLoadModel.java           | 13 +++++++++
 .../processing/csvreaderstep/CsvInput.java      |  2 ++
 .../processing/csvreaderstep/CsvInputMeta.java  | 30 ++++++++++++++++++++
 .../csvreaderstep/UnivocityCsvParser.java       |  3 ++
 .../csvreaderstep/UnivocityCsvParserVo.java     | 21 ++++++++++++++
 .../dataprocessor/DataProcessTaskStatus.java    | 14 +++++++++
 .../dataprocessor/IDataProcessStatus.java       |  4 +++
 .../graphgenerator/GraphGenerator.java          |  6 ++++
 19 files changed, 170 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
----------------------------------------------------------------------
diff --git a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
index a48e6ad..5867160 100644
--- a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
+++ b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
@@ -353,6 +353,8 @@ public class StoreCreator {
     GraphGenerator.blockInfo.put("qwqwq", new BlockDetails[] { blockDetails });
     schmaModel.setBlocksID("qwqwq");
     schmaModel.setEscapeCharacter("\\");
+    schmaModel.setQuoteCharacter("\"");
+    schmaModel.setCommentCharacter("#");
     info.setDatabaseName(databaseName);
     info.setTableName(tableName);
 
@@ -460,6 +462,8 @@ public class StoreCreator {
     model.setBlocksID(schmaModel.getBlocksID());
     model.setFactTimeStamp(readCurrentTime());
     model.setEscapeCharacter(schmaModel.getEscapeCharacter());
+    model.setQuoteCharacter(schmaModel.getQuoteCharacter());
+    model.setCommentCharacter(schmaModel.getCommentCharacter());
     if (null != loadMetadataDetails && !loadMetadataDetails.isEmpty()) {
       model.setLoadNames(
           CarbonDataProcessorUtil.getLoadNameFromLoadMetaDataDetails(loadMetadataDetails));

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
index c1a073d..68f3929 100644
--- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
+++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
@@ -97,6 +97,16 @@ public class CarbonLoadModel implements Serializable {
   private String escapeChar;
 
   /**
+   * quote Char
+   */
+  private String quoteChar;
+
+  /**
+   * comment Char
+   */
+  private String commentChar;
+
+  /**
    * defines the string that should be treated as null while loadind data
    */
   private String serializationNullFormat;
@@ -322,6 +332,8 @@ public class CarbonLoadModel implements Serializable {
     copy.segmentId = segmentId;
     copy.serializationNullFormat = serializationNullFormat;
     copy.escapeChar = escapeChar;
+    copy.quoteChar = quoteChar;
+    copy.commentChar = commentChar;
     copy.maxColumns = maxColumns;
     return copy;
   }
@@ -361,6 +373,8 @@ public class CarbonLoadModel implements Serializable {
     copyObj.segmentId = segmentId;
     copyObj.serializationNullFormat = serializationNullFormat;
     copyObj.escapeChar = escapeChar;
+    copyObj.quoteChar = quoteChar;
+    copyObj.commentChar = commentChar;
     copyObj.maxColumns = maxColumns;
     return copyObj;
   }
@@ -531,6 +545,22 @@ public class CarbonLoadModel implements Serializable {
     this.serializationNullFormat = serializationNullFormat;
   }
 
+  public String getQuoteChar() {
+    return quoteChar;
+  }
+
+  public void setQuoteChar(String quoteChar) {
+    this.quoteChar = quoteChar;
+  }
+
+  public String getCommentChar() {
+    return commentChar;
+  }
+
+  public void setCommentChar(String commentChar) {
+    this.commentChar = commentChar;
+  }
+
   /**
    * @return
    */

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
index 246597c..a8302c0 100644
--- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
+++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
@@ -126,6 +126,8 @@ public final class CarbonLoaderUtil {
     }
     model.setBlocksID(schmaModel.getBlocksID());
     model.setEscapeCharacter(schmaModel.getEscapeCharacter());
+    model.setQuoteCharacter(schmaModel.getQuoteCharacter());
+    model.setCommentCharacter(schmaModel.getCommentCharacter());
     model.setTaskNo(loadModel.getTaskNo());
     model.setFactTimeStamp(loadModel.getFactTimeStamp());
     model.setMaxColumns(loadModel.getMaxColumns());
@@ -180,6 +182,8 @@ public final class CarbonLoaderUtil {
 
     schmaModel.setBlocksID(loadModel.getBlocksID());
     schmaModel.setEscapeCharacter(loadModel.getEscapeChar());
+    schmaModel.setQuoteCharacter(loadModel.getQuoteChar());
+    schmaModel.setCommentCharacter(loadModel.getCommentChar());
     SchemaInfo info = new SchemaInfo();
 
     info.setDatabaseName(databaseName);

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
index 3580810..02b70d0 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -365,6 +365,15 @@ object GlobalDictionaryUtil extends Logging {
       .option("ignoreLeadingWhiteSpace", "false")
       .option("ignoreTrailingWhiteSpace", "false")
       .option("codec", "gzip")
+      .option("quote", {
+        if (StringUtils.isEmpty(carbonLoadModel.getQuoteChar)) {
+          "" + CSVWriter. DEFAULT_QUOTE_CHARACTER
+        }
+        else {
+          carbonLoadModel.getQuoteChar
+        }
+      })
+      .option("comment", carbonLoadModel.getCommentChar)
       .load(carbonLoadModel.getFactFilePath)
     df
   }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index 3bc5f5c..f2df810 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -997,7 +997,7 @@ class CarbonSqlParser()
     val supportedOptions = Seq("DELIMITER", "QUOTECHAR", "FILEHEADER", "ESCAPECHAR", "MULTILINE",
       "COMPLEX_DELIMITER_LEVEL_1", "COMPLEX_DELIMITER_LEVEL_2", "COLUMNDICT",
       "SERIALIZATION_NULL_FORMAT",
-      "ALL_DICTIONARY_PATH", "MAXCOLUMNS"
+      "ALL_DICTIONARY_PATH", "MAXCOLUMNS", "COMMENTCHAR"
     )
     var isSupported = true
     val invalidOptions = StringBuilder.newBuilder

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 1e06165..2047872 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -1091,6 +1091,7 @@ private[sql] case class LoadTable(
       val quoteChar = partionValues.getOrElse("quotechar", "\"")
       val fileHeader = partionValues.getOrElse("fileheader", "")
       val escapeChar = partionValues.getOrElse("escapechar", "\\")
+      val commentchar = partionValues.getOrElse("commentchar", "#")
       val columnDict = partionValues.getOrElse("columndict", null)
       val serializationNullFormat = partionValues.getOrElse("serialization_null_format",
"\\N")
       val allDictionaryPath = partionValues.getOrElse("all_dictionary_path", "")
@@ -1107,6 +1108,8 @@ private[sql] case class LoadTable(
       val maxColumns = partionValues.getOrElse("maxcolumns", null)
       carbonLoadModel.setMaxColumns(maxColumns)
       carbonLoadModel.setEscapeChar(escapeChar)
+      carbonLoadModel.setQuoteChar(quoteChar)
+      carbonLoadModel.setCommentChar(commentchar)
       carbonLoadModel.setSerializationNullFormat("serialization_null_format" + "," +
         serializationNullFormat)
       if (delimiter.equalsIgnoreCase(complex_delimiter_level_1) ||

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/resources/comment.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/comment.csv b/integration/spark/src/test/resources/comment.csv
new file mode 100644
index 0000000..34ccb12
--- /dev/null
+++ b/integration/spark/src/test/resources/comment.csv
@@ -0,0 +1,5 @@
+.~carbon,.,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon
+,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon
+#?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon
+?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon
+".carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index da64b39..0cc2b9f 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -385,7 +385,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
     sql(
       """
        LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table
t3
-          options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='\')
+          options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='\')
       """
     )
     checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
@@ -406,7 +406,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
     sql(
       """
        LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table
t3
-          options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='@')
+          options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='@')
       """
     )
     checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
@@ -590,6 +590,21 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
     checkAnswer(sql("select * from carbontable1"), sql("select * from hivetable1"))
   }
 
+  test("test data loading with comment option") {
+    sql("drop table if exists comment_test")
+    sql(
+      "create table comment_test(imei string, age int, task bigint, num double, level decimal(10,"
+
+        "3), productdate timestamp, mark int, name string) STORED BY 'org.apache.carbondata.format'"
+    )
+    sql(
+      "LOAD DATA local inpath './src/test/resources/comment.csv' INTO TABLE comment_test
" +
+        "options('DELIMITER' = ',', 'QUOTECHAR' = '.', 'COMMENTCHAR' = '?','FILEHEADER'='imei,age,task,num,level,productdate,mark,name')"
+    )
+    checkAnswer(sql("select imei from comment_test"),Seq(Row("\".carbon"),Row("#?carbon"),
Row(""),
+      Row("~carbon,")))
+  }
+
+
   override def afterAll {
     sql("drop table carbontable")
     sql("drop table hivetable")
@@ -597,5 +612,6 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
     sql("drop table if exists mixed_header_test")
     sql("drop table carbontable1")
     sql("drop table hivetable1")
+    sql("drop table if exists comment_test")
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
index a2685b1..44ca85e 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
@@ -140,6 +140,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll
     carbonLoadModel.setComplexDelimiterLevel1("\\$")
     carbonLoadModel.setComplexDelimiterLevel2("\\:")
     carbonLoadModel.setColDictFilePath(extColFilePath)
+    carbonLoadModel.setQuoteChar("\"");
     carbonLoadModel
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
index 8468090..4108abe 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
@@ -70,6 +70,7 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft
     carbonLoadModel.setComplexDelimiterLevel1("\\$")
     carbonLoadModel.setComplexDelimiterLevel2("\\:")
     carbonLoadModel.setStorePath(relation.tableMeta.storePath)
+    carbonLoadModel.setQuoteChar("\"")
     carbonLoadModel
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
index 05c2715..32beeee 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
@@ -69,6 +69,7 @@ class GlobalDictionaryUtilTestCase extends QueryTest with BeforeAndAfterAll
{
     carbonLoadModel.setComplexDelimiterLevel1("\\$")
     carbonLoadModel.setComplexDelimiterLevel2("\\:")
     carbonLoadModel.setStorePath(relation.tableMeta.storePath)
+    carbonLoadModel.setQuoteChar("\"")
     carbonLoadModel
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
index 239457b..42a8382 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
@@ -61,6 +61,10 @@ public class DataLoadModel {
 
   private String escapeCharacter;
 
+  private String quoteCharacter;
+
+  private String commentCharacter;
+
   private String maxColumns;
   /**
    * @return Returns the schemaInfo.
@@ -200,6 +204,15 @@ public class DataLoadModel {
     this.escapeCharacter = escapeCharacter;
   }
 
+  public String getQuoteCharacter() { return quoteCharacter; }
+
+  public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter;
}
+
+  public String getCommentCharacter() { return commentCharacter; }
+
+  public void setCommentCharacter(String commentCharacter) {
+    this.commentCharacter = commentCharacter;
+  }
   /**
    * @return
    */

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
index c01a682..86ec915 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
@@ -454,6 +454,8 @@ public class CsvInput extends BaseStep implements StepInterface {
     csvParserVo.setNumberOfColumns(meta.getInputFields().length);
     csvParserVo.setEscapeCharacter(meta.getEscapeCharacter());
     csvParserVo.setHeaderPresent(meta.isHeaderPresent());
+    csvParserVo.setQuoteCharacter(meta.getQuoteCharacter());
+    csvParserVo.setCommentCharacter(meta.getCommentCharacter());
     String maxColumns = meta.getMaxColumns();
     if(null != maxColumns) {
       csvParserVo.setMaxColumns(Integer.parseInt(maxColumns));

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
index 2e0dece..c5b801a 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
@@ -99,6 +99,10 @@ public class CsvInputMeta extends BaseStepMeta
 
   private String escapeCharacter;
 
+  private String quoteCharacter;
+
+  private String commentCharacter;
+
   private String maxColumns;
 
   public CsvInputMeta() {
@@ -121,6 +125,8 @@ public class CsvInputMeta extends BaseStepMeta
     blocksID = "";
     partitionID = "";
     escapeCharacter ="\\";
+    quoteCharacter = "\"";
+    commentCharacter = "#";
   }
 
   private void readData(Node stepnode) throws KettleXMLException {
@@ -157,6 +163,8 @@ public class CsvInputMeta extends BaseStepMeta
       blocksID = XMLHandler.getTagValue(stepnode, "blocksID");
       partitionID = XMLHandler.getTagValue(stepnode, "partitionID");
       escapeCharacter = XMLHandler.getTagValue(stepnode, "escapeCharacter");
+      quoteCharacter = XMLHandler.getTagValue(stepnode, "quoteCharacter");
+      commentCharacter = XMLHandler.getTagValue(stepnode, "commentCharacter");
       maxColumns = XMLHandler.getTagValue(stepnode, "maxColumns");
       Node fields = XMLHandler.getSubNode(stepnode, getXmlCode("FIELDS"));
       int nrfields = XMLHandler.countNodes(fields, getXmlCode("FIELD"));
@@ -220,6 +228,8 @@ public class CsvInputMeta extends BaseStepMeta
     retval.append("    ").append(XMLHandler.addTagValue("blocksID", blocksID));
     retval.append("    ").append(XMLHandler.addTagValue("partitionID", partitionID));
     retval.append("    ").append(XMLHandler.addTagValue("escapeCharacter", escapeCharacter));
+    retval.append("    ").append(XMLHandler.addTagValue("quoteCharacter", quoteCharacter));
+    retval.append("    ").append(XMLHandler.addTagValue("commentCharacter", commentCharacter));
     retval.append("    ").append(XMLHandler.addTagValue("maxColumns", maxColumns));
     retval.append("    ").append(XMLHandler.openTag(getXmlCode("FIELDS"))).append(Const.CR);
     for (int i = 0; i < inputFields.length; i++) {
@@ -273,6 +283,8 @@ public class CsvInputMeta extends BaseStepMeta
       blocksID = rep.getStepAttributeString(idStep, getRepCode("blocksID"));
       partitionID = rep.getStepAttributeString(idStep, getRepCode("partitionID"));
       escapeCharacter = rep.getStepAttributeString(idStep, getRepCode("escapeCharacter"));
+      quoteCharacter = rep.getStepAttributeString(idStep, getRepCode("quoteCharacter"));
+      commentCharacter = rep.getStepAttributeString(idStep, getRepCode("commentCharacter"));
       maxColumns = rep.getStepAttributeString(idStep, getRepCode("maxColumns"));
       int nrfields = rep.countNrStepAttributes(idStep, getRepCode("FIELD_NAME"));
 
@@ -329,6 +341,10 @@ public class CsvInputMeta extends BaseStepMeta
       rep.saveStepAttribute(idTransformation, idStep, getRepCode("partitionID"), partitionID);
       rep.saveStepAttribute(idTransformation, idStep, getRepCode("escapeCharacter"),
           escapeCharacter);
+      rep.saveStepAttribute(idTransformation, idStep, getRepCode("quoteCharacter"),
+          quoteCharacter);
+      rep.saveStepAttribute(idTransformation, idStep, getRepCode("commentCharacter"),
+          commentCharacter);
       rep.saveStepAttribute(idTransformation, idStep, getRepCode("maxColumns"),
           maxColumns);
       for (int i = 0; i < inputFields.length; i++) {
@@ -623,6 +639,16 @@ public class CsvInputMeta extends BaseStepMeta
     this.escapeCharacter = escapeCharacter;
   }
 
+  public String getQuoteCharacter() { return quoteCharacter; }
+
+  public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter;
}
+
+  public String getCommentCharacter() { return commentCharacter; }
+
+  public void setCommentCharacter(String commentCharacter) {
+    this.commentCharacter = commentCharacter;
+  }
+
   public String getFileType() {
     return "CSV";
   }
@@ -828,6 +854,10 @@ public class CsvInputMeta extends BaseStepMeta
           partitionID = (String) entry.getValue();
         } else if ("escapeCharacter".equals(attributeKey)) {
           escapeCharacter = (String) entry.getValue();
+        } else if ("quoteCharacter".equals(attributeKey)) {
+          quoteCharacter = (String) entry.getValue();
+        } else if ("commentCharacter".equals(attributeKey)) {
+          commentCharacter = (String) entry.getValue();
         } else {
           throw new RuntimeException(
               "Unhandled metadata injection of attribute: " + attr.toString() + " - " + attr

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
index 49f17dc..89eec54 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
@@ -93,6 +93,7 @@ public class UnivocityCsvParser {
   public void initialize() throws IOException {
     CsvParserSettings parserSettings = new CsvParserSettings();
     parserSettings.getFormat().setDelimiter(csvParserVo.getDelimiter().charAt(0));
+    parserSettings.getFormat().setComment(csvParserVo.getCommentCharacter().charAt(0));
     parserSettings.setLineSeparatorDetectionEnabled(true);
     parserSettings.setMaxColumns(
         getMaxColumnsForParsing(csvParserVo.getNumberOfColumns(), csvParserVo.getMaxColumns()));
@@ -100,6 +101,8 @@ public class UnivocityCsvParser {
     parserSettings.setIgnoreLeadingWhitespaces(false);
     parserSettings.setIgnoreTrailingWhitespaces(false);
     parserSettings.setSkipEmptyLines(false);
+    parserSettings.getFormat().setQuote(null == csvParserVo.getQuoteCharacter() ?
+        '\"':csvParserVo.getQuoteCharacter().charAt(0));
     parserSettings.getFormat().setQuoteEscape(null == csvParserVo.getEscapeCharacter() ?
         '\\' :
         csvParserVo.getEscapeCharacter().charAt(0));

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
index 623cac3..5383309 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
@@ -71,6 +71,16 @@ public class UnivocityCsvParserVo {
   private String escapeCharacter;
 
   /**
+   * quote character;
+   */
+  private String quoteCharacter;
+
+  /**
+   * comment character;
+   */
+  private String commentCharacter;
+
+  /**
    * max number of columns configured by user to be parsed in a row
    */
   private int maxColumns;
@@ -187,6 +197,17 @@ public class UnivocityCsvParserVo {
     this.escapeCharacter = escapeCharacter;
   }
 
+  public String getQuoteCharacter() { return quoteCharacter; }
+
+  public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter;
}
+
+  public String getCommentCharacter() { return commentCharacter; }
+
+  public void setCommentCharacter(String commentCharacter) {
+    this.commentCharacter = commentCharacter;
+  }
+
+
   /**
    * @return
    */

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
index bc09f89..a0fb157 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
@@ -87,6 +87,10 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable
{
 
   private String escapeCharacter;
 
+  private String quoteCharacter;
+
+  private String commentCharacter;
+
   public DataProcessTaskStatus(String databaseName, String tableName) {
     this.databaseName = databaseName;
     this.tableName = tableName;
@@ -283,4 +287,14 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable
{
   public void setEscapeCharacter(String escapeCharacter) {
     this.escapeCharacter = escapeCharacter;
   }
+
+  public String getQuoteCharacter() { return quoteCharacter; }
+
+  public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter;
}
+
+  public String getCommentCharacter() { return commentCharacter; }
+
+  public void setCommentCharacter(String commentCharacter) {
+    this.commentCharacter = commentCharacter;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
index 22cada5..56dfe00 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
@@ -189,4 +189,8 @@ public interface IDataProcessStatus {
   String getBlocksID();
 
   String getEscapeCharacter();
+
+  String getQuoteCharacter();
+
+  String getCommentCharacter();
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
index f55c247..b10841f 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
@@ -185,6 +185,8 @@ public class GraphGenerator {
   private String factStoreLocation;
   private String blocksID;
   private String escapeCharacter;
+  private String quoteCharacter;
+  private String commentCharacter;
   /**
    * task id, each spark task has a unique id
    */
@@ -219,6 +221,8 @@ public class GraphGenerator {
     this.isColumnar = Boolean.parseBoolean(CarbonCommonConstants.IS_COLUMNAR_STORAGE_DEFAULTVALUE);
     this.blocksID = dataLoadModel.getBlocksID();
     this.taskNo = dataLoadModel.getTaskNo();
+    this.quoteCharacter = dataLoadModel.getQuoteCharacter();
+    this.commentCharacter = dataLoadModel.getCommentCharacter();
     this.factTimeStamp = dataLoadModel.getFactTimeStamp();
     this.segmentId = segmentId;
     this.escapeCharacter = dataLoadModel.getEscapeCharacter();
@@ -450,6 +454,8 @@ public class GraphGenerator {
     csvInputMeta.setBlocksID(this.blocksID);
     csvInputMeta.setPartitionID(this.partitionID);
     csvInputMeta.setEscapeCharacter(this.escapeCharacter);
+    csvInputMeta.setQuoteCharacter(this.quoteCharacter);
+    csvInputMeta.setCommentCharacter(this.commentCharacter);
     csvDataStep.setDraw(true);
     csvDataStep.setDescription("Read raw data from " + GraphGeneratorConstants.CSV_INPUT);
 


Mime
View raw message