carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [19/50] [abbrv] incubator-carbondata git commit: [Bug]skip escape char in kettle and fix quote in middle of value (#751)
Date Thu, 30 Jun 2016 17:42:06 GMT
[Bug]skip escape char in kettle and fix quote in middle of value (#751)

In currectly flow,escape character will no skip in kettle,while dictionary generation will
skip the escape character .and if the value no start with quote,it will no skip also.
like "ab\c"->"abc",a"b\c"->a"b\c"

also,this pr fix the value like ab"c"d,which contain double quote in middle of value.

as suggesoin,we need to make the same parser in phase 1:global dictionary and phase 2:data
loading.
using same parser:univocity parser. then all defects about different parser will no comes
any more.

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5045d739
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5045d739
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5045d739

Branch: refs/heads/master
Commit: 5045d73942204eddf801e0f944a1f6897f61b12e
Parents: f16eb54
Author: linyixin <linyixin@huawei.com>
Authored: Mon Jun 27 17:45:59 2016 +0800
Committer: Vimal-Das <vimaldas.kammath@gmail.com>
Committed: Mon Jun 27 02:45:59 2016 -0700

----------------------------------------------------------------------
 .../hadoop/test/util/StoreCreator.java          |  3 +-
 .../carbondata/spark/load/CarbonLoadModel.java  |  2 +
 .../carbondata/spark/load/CarbonLoaderUtil.java |  2 +
 .../execution/command/carbonTableSchema.scala   |  2 +-
 .../test/resources/datawithescapecharacter.csv  | 22 ++++++
 .../test/resources/datawithspecialcharacter.csv | 37 +++++++++
 .../dataload/TestLoadDataWithHiveSyntax.scala   | 55 +++++++++++++-
 .../api/dataloader/DataLoadModel.java           |  9 +++
 .../processing/csvload/DataGraphExecuter.java   |  8 ++
 .../csvreaderstep/BlockDataHandler.java         | 79 +++++++++++++-------
 .../processing/csvreaderstep/CsvInput.java      |  3 +-
 .../processing/csvreaderstep/CsvInputData.java  |  1 +
 .../processing/csvreaderstep/CsvInputMeta.java  | 21 +++++-
 .../dataprocessor/DataProcessTaskStatus.java    | 10 +++
 .../dataprocessor/IDataProcessStatus.java       |  2 +
 .../graphgenerator/GraphGenerator.java          |  3 +
 16 files changed, 224 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/hadoop/src/test/java/org/carbondata/hadoop/test/util/StoreCreator.java
----------------------------------------------------------------------
diff --git a/hadoop/src/test/java/org/carbondata/hadoop/test/util/StoreCreator.java b/hadoop/src/test/java/org/carbondata/hadoop/test/util/StoreCreator.java
index 1190a9d..556fc8d 100644
--- a/hadoop/src/test/java/org/carbondata/hadoop/test/util/StoreCreator.java
+++ b/hadoop/src/test/java/org/carbondata/hadoop/test/util/StoreCreator.java
@@ -365,7 +365,7 @@ public class StoreCreator {
         0, new File(loadModel.getFactFilePath()).length());
     GraphGenerator.blockInfo.put("qwqwq", new BlockDetails[] { blockDetails });
     schmaModel.setBlocksID("qwqwq");
-
+    schmaModel.setEscapeCharacter("\\");
     info.setSchemaName(schemaName);
     info.setCubeName(cubeName);
 
@@ -473,6 +473,7 @@ public class StoreCreator {
     model.setTaskNo("1");
     model.setBlocksID(schmaModel.getBlocksID());
     model.setFactTimeStamp(readCurrentTime());
+    model.setEscapeCharacter(schmaModel.getEscapeCharacter());
     if (null != loadMetadataDetails && !loadMetadataDetails.isEmpty()) {
       model.setLoadNames(
           CarbonDataProcessorUtil.getLoadNameFromLoadMetaDataDetails(loadMetadataDetails));

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoadModel.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoadModel.java
b/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoadModel.java
index c422c9f..7125818 100644
--- a/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoadModel.java
+++ b/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoadModel.java
@@ -264,6 +264,7 @@ public class CarbonLoadModel implements Serializable {
     copy.taskNo = taskNo;
     copy.factTimeStamp = factTimeStamp;
     copy.segmentId = segmentId;
+    copy.escapeChar = escapeChar;
     return copy;
   }
 
@@ -300,6 +301,7 @@ public class CarbonLoadModel implements Serializable {
     copyObj.taskNo = taskNo;
     copyObj.factTimeStamp = factTimeStamp;
     copyObj.segmentId = segmentId;
+    copyObj.escapeChar = escapeChar;
     return copyObj;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoaderUtil.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoaderUtil.java
b/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoaderUtil.java
index 0fb2621..c5eb971 100644
--- a/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoaderUtil.java
+++ b/integration/spark/src/main/java/org/carbondata/spark/load/CarbonLoaderUtil.java
@@ -104,6 +104,7 @@ public final class CarbonLoaderUtil {
           .getModificationOrDeletionTimesFromLoadMetadataDetails(loadMetadataDetails));
     }
     model.setBlocksID(schmaModel.getBlocksID());
+    model.setEscapeCharacter(schmaModel.getEscapeCharacter());
     model.setTaskNo(loadModel.getTaskNo());
     model.setFactTimeStamp(loadModel.getFactTimeStamp());
     boolean hdfsReadMode =
@@ -159,6 +160,7 @@ public final class CarbonLoaderUtil {
     }
 
     schmaModel.setBlocksID(loadModel.getBlocksID());
+    schmaModel.setEscapeCharacter(loadModel.getEscapeChar());
     SchemaInfo info = new SchemaInfo();
 
     info.setSchemaName(databaseName);

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 99cb9ca..a65b7b7 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -1496,7 +1496,7 @@ private[sql] case class LoadCube(
       val delimiter = partionValues.getOrElse("delimiter", ",")
       val quoteChar = partionValues.getOrElse("quotechar", "\"")
       val fileHeader = partionValues.getOrElse("fileheader", "")
-      val escapeChar = partionValues.getOrElse("escapechar", "")
+      val escapeChar = partionValues.getOrElse("escapechar", "\\")
       val complex_delimiter_level_1 = partionValues.getOrElse("complex_delimiter_level_1",
"\\$")
       val complex_delimiter_level_2 = partionValues.getOrElse("complex_delimiter_level_2",
"\\:")
       val multiLine = partionValues.getOrElse("multiline", "false").trim.toLowerCase match
{

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/test/resources/datawithescapecharacter.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/datawithescapecharacter.csv b/integration/spark/src/test/resources/datawithescapecharacter.csv
new file mode 100644
index 0000000..9bda54d
--- /dev/null
+++ b/integration/spark/src/test/resources/datawithescapecharacter.csv
@@ -0,0 +1,22 @@
+imei,specialchar
+1AA1,hash#124
+1AA2,space 125
+1AA3,ampersand&&hi
+1AA4,escape\\esc
+1AA44,"escape\esc"
+1AA5,not!hi
+1AA6,braces(hi)
+1AA7,percentage%hi
+1AA8,Tilde~~
+1AA9,dollar$hi
+1AA10,star***hi
+1AA11,colon:hi
+1AA12,semi;colon
+1AA13,quote'1'22
+1AA14,underscore_hi
+1AA15,equals=hi
+1AA16,plus+hi
+1232,"ayush@b.com"
+12323,"ayush@@b.com"
+12345,"西安\咸阳"
+12346,"西安\\咸阳"

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/test/resources/datawithspecialcharacter.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/datawithspecialcharacter.csv b/integration/spark/src/test/resources/datawithspecialcharacter.csv
new file mode 100644
index 0000000..f807120
--- /dev/null
+++ b/integration/spark/src/test/resources/datawithspecialcharacter.csv
@@ -0,0 +1,37 @@
+imei,specialchar
+1AA0,double"specialchar"quote
+1AA1,hash#124
+1AA2,space 125
+1AA3,ampersand&&hi
+1AA4,escape\esc
+1AA5,not!hi
+1AA6,braces(hi)
+1AA7,percentage%hi
+1AA8,Tilde~~
+1AA9,dollar$hi
+1AA10,star***hi
+1AA11,colon:hi
+1AA12,semi;colon
+1AA13,quote'1'22
+1AA14,underscore_hi
+1AA15,equals=hi
+1AA16,plus+hi
+1AA17,minus-hi
+1AA18,combination ~!@#$%^&*()_+| CA
+1AA19,https://www.google.co.in/?gws_rd=cr&ei=BF7rUqOoEc6GrgeIooHQDQ#q=India+state BC
+1AA20,LonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmembernameLonglevelmember
+1AA21,char123234Number
+1AA22,\\  esc  !@~##%%&**(*&((*()()*  ""  some thing ""  ' DF
+1AA23,select * from sale_fact
+1AA24,pipe|hi
+1AA25,Curly{braces}hi
+1AA26,braces[hi]
+1AA27,question?hi
+1AA28,lessthan< hi
+1AA29,morethan>hi
+1AA30,dot.hi
+1AA31,comma,hi
+1AA32,escapecomma\,hi
+1AA33,fslash/hi
+1AA34,a@b
+1AA35,\\n

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index 6765b2a..a75f020 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -202,7 +202,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
     sql("drop table LowErcasEcube")
   }
   
-  test("test carbon table data loading using escape char") {
+  test("test carbon table data loading using escape char 1") {
     sql("DROP TABLE IF EXISTS t3")
 
     sql("""
@@ -222,7 +222,58 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll
{
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
     sql("DROP TABLE IF EXISTS t3")
   }
-  
+
+  test("test carbon table data loading using escape char 2") {
+    sql("DROP TABLE IF EXISTS t3")
+
+    sql("""
+         CREATE TABLE t3(imei string,specialchar string)
+         STORED BY 'org.apache.carbondata.format'
+        """)
+
+    sql("""
+       LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table
t3
+          options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='\')
+        """)
+    checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
+    checkAnswer(sql("select specialchar from t3 where imei = '1AA44'"),Seq(Row("escapeesc")))
+    sql("DROP TABLE IF EXISTS t3")
+  }
+
+  test("test carbon table data loading using escape char 3") {
+    sql("DROP TABLE IF EXISTS t3")
+
+    sql("""
+         CREATE TABLE t3(imei string,specialchar string)
+         STORED BY 'org.apache.carbondata.format'
+        """)
+
+    sql("""
+       LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table
t3
+          options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='@')
+        """)
+    checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
+    checkAnswer(sql("select specialchar from t3 where imei in ('1232','12323')"),Seq(Row
+    ("ayush@b.com"),Row("ayushb.com")))
+    sql("DROP TABLE IF EXISTS t3")
+  }
+  test("test carbon table data loading with special character") {
+    sql("DROP TABLE IF EXISTS t3")
+
+    sql("""
+         CREATE TABLE t3(imei string,specialchar string)
+         STORED BY 'org.apache.carbondata.format'
+        """)
+
+    sql("""
+       LOAD DATA LOCAL INPATH './src/test/resources/datawithspecialcharacter.csv' into table
t3
+          options ('DELIMITER'=',', 'QUOTECHAR'='\"')
+       """)
+    checkAnswer(sql("select count(*) from t3"), Seq(Row(36)))
+    sql("DROP TABLE IF EXISTS t3")
+  }
+
+
   override def afterAll {
     sql("drop table carbontable")
     sql("drop table hivetable")

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/api/dataloader/DataLoadModel.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/api/dataloader/DataLoadModel.java
b/processing/src/main/java/org/carbondata/processing/api/dataloader/DataLoadModel.java
index 0236c80..dd545c9 100644
--- a/processing/src/main/java/org/carbondata/processing/api/dataloader/DataLoadModel.java
+++ b/processing/src/main/java/org/carbondata/processing/api/dataloader/DataLoadModel.java
@@ -59,6 +59,7 @@ public class DataLoadModel {
    */
   private String factTimeStamp;
 
+  private String escapeCharacter;
   /**
    * @return Returns the schemaInfo.
    */
@@ -188,5 +189,13 @@ public class DataLoadModel {
   public void setFactTimeStamp(String factTimeStamp) {
     this.factTimeStamp = factTimeStamp;
   }
+
+  public String getEscapeCharacter() {
+    return escapeCharacter;
+  }
+
+  public void setEscapeCharacter(String escapeCharacter) {
+    this.escapeCharacter = escapeCharacter;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
index 6526e90..ca21ec1 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
@@ -327,6 +327,8 @@ public class DataGraphExecuter {
                 .getTextInputFiles(model.getCsvHeader(), builder, measuresInCSVFile, ",");
             ((CsvInputMeta) step.getStepMetaInterface()).setInputFields(inputParams);
             ((CsvInputMeta) step.getStepMetaInterface()).setDelimiter(model.getCsvDelimiter());
+            ((CsvInputMeta) step.getStepMetaInterface())
+              .setEscapeCharacter(model.getEscapeCharacter());
             ((CsvInputMeta) step.getStepMetaInterface()).setHeaderPresent(false);
 
           } else if (model.getFilesToProcess().size() > 0) {
@@ -337,6 +339,8 @@ public class DataGraphExecuter {
                     model.getCsvDelimiter());
             ((CsvInputMeta) step.getStepMetaInterface()).setInputFields(inputFields);
             ((CsvInputMeta) step.getStepMetaInterface()).setDelimiter(model.getCsvDelimiter());
+            ((CsvInputMeta) step.getStepMetaInterface())
+              .setEscapeCharacter(model.getEscapeCharacter());
             ((CsvInputMeta) step.getStepMetaInterface()).setHeaderPresent(true);
           }
         }
@@ -415,6 +419,8 @@ public class DataGraphExecuter {
                 .getTextInputFiles(model.getCsvHeader(), builder, measuresInCSVFile, ",");
             ((CsvInputMeta) step.getStepMetaInterface()).setInputFields(inputFields);
             ((CsvInputMeta) step.getStepMetaInterface()).setDelimiter(model.getCsvDelimiter());
+            ((CsvInputMeta) step.getStepMetaInterface())
+                .setEscapeCharacter(model.getEscapeCharacter());
             ((CsvInputMeta) step.getStepMetaInterface()).setHeaderPresent(false);
 
           } else if (model.getFilesToProcess().size() > 0) {
@@ -425,6 +431,8 @@ public class DataGraphExecuter {
                     model.getCsvDelimiter());
             ((CsvInputMeta) step.getStepMetaInterface()).setInputFields(inputFields);
             ((CsvInputMeta) step.getStepMetaInterface()).setDelimiter(model.getCsvDelimiter());
+            ((CsvInputMeta) step.getStepMetaInterface())
+              .setEscapeCharacter(model.getEscapeCharacter());
             ((CsvInputMeta) step.getStepMetaInterface()).setHeaderPresent(true);
           }
         }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/csvreaderstep/BlockDataHandler.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvreaderstep/BlockDataHandler.java
b/processing/src/main/java/org/carbondata/processing/csvreaderstep/BlockDataHandler.java
index 29b4a54..4a56b85 100644
--- a/processing/src/main/java/org/carbondata/processing/csvreaderstep/BlockDataHandler.java
+++ b/processing/src/main/java/org/carbondata/processing/csvreaderstep/BlockDataHandler.java
@@ -136,29 +136,24 @@ public class BlockDataHandler {
     return false;
   }
 
-  /**
-   * <pre>
-   * [abcd "" defg] --> [abcd " defg]
-   * [""""] --> [""]
-   * [""] --> ["]
-   * </pre>
-   *
-   * @return the byte array with escaped enclosures escaped.
-   */
-  public byte[] removeEscapedEnclosures(byte[] field, int nrEnclosuresFound) {
-    byte[] result = new byte[field.length - nrEnclosuresFound];
+  public byte[] removeEscapeChar(byte[] field, byte[] escapeChar) {
+    byte[] result = new byte[field.length];
     int resultIndex = 0;
     for (int i = 0; i < field.length; i++) {
-      if (field[i] == data.enclosure[0]) {
-        if (!(i + 1 < field.length && field[i + 1] == data.enclosure[0])) {
-          // Not an escaped enclosure...
+      if (field[i] != escapeChar[0]) {
+        result[resultIndex++] = field[i];
+      }
+      if (i + 1 < field.length) {
+        if (field[i] == escapeChar[0] && field[i + 1] == escapeChar[0]) {
           result[resultIndex++] = field[i];
+          i++;
         }
-      } else {
-        result[resultIndex++] = field[i];
       }
     }
-    return result;
+
+    byte[] finalResult = new byte[resultIndex];
+    System.arraycopy(result,0,finalResult,0,resultIndex);
+    return finalResult;
   }
 
   protected boolean openFile(StepMetaInterface smi, StepDataInterface sdi, TransMeta trans,
@@ -166,6 +161,7 @@ public class BlockDataHandler {
     try {
       this.meta = (CsvInputMeta) smi;
       this.data = (CsvInputData) sdi;
+      this.data.preferredBufferSize = Integer.parseInt(this.meta.getBufferSize());
       this.transMeta = trans;
       // Close the previous file...
       if (this.bufferedInputStream != null) {
@@ -300,7 +296,8 @@ public class BlockDataHandler {
         //
         boolean delimiterFound = false;
         boolean enclosureFound = false;
-        int escapedEnclosureFound = 0;
+        boolean quoteAfterDelimiter = false;
+        boolean quoteBeforeDelimiterOrCrLf = false;
         while (!delimiterFound) {
           // If we find the first char, we might find others as well ;-)
           // Single byte delimiters only for now.
@@ -379,8 +376,11 @@ public class BlockDataHandler {
           //
           else if (data.enclosure != null && data.enclosureMatcher
               .matchesPattern(this.byteBuffer, this.endBuffer, data.enclosure)) {
-
+            if(this.startBuffer == this.endBuffer){
+              quoteAfterDelimiter = true;
+            }
             enclosureFound = true;
+            boolean outOfEnclosureFlag = false;
             boolean keepGoing;
             do {
               if (this.increaseEndBuffer()) {
@@ -405,6 +405,7 @@ public class BlockDataHandler {
                   .matchesPattern(this.byteBuffer, this.endBuffer,
                       data.enclosure);
               if (!keepGoing) {
+                outOfEnclosureFlag = !outOfEnclosureFlag;
                 // We found an enclosure character.
                 // Read another byte...
                 if (this.increaseEndBuffer()) {
@@ -420,7 +421,7 @@ public class BlockDataHandler {
                     .matchesPattern(this.byteBuffer, this.endBuffer,
                         data.enclosure);
                 if (keepGoing) {
-                  escapedEnclosureFound++;
+                  outOfEnclosureFlag = !outOfEnclosureFlag;
                 } else {
                   /**
                    * <pre>
@@ -444,6 +445,26 @@ public class BlockDataHandler {
                 }
 
               }
+              if (!keepGoing) {
+                if (data.enclosureMatcher
+                    .matchesPattern(this.byteBuffer, this.endBuffer - 1, data.enclosure))
{
+                  quoteBeforeDelimiterOrCrLf = true;
+                }
+              }
+              if (outOfEnclosureFlag) {
+                keepGoing = !(data.delimiterMatcher
+                  .matchesPattern(this.byteBuffer, this.endBuffer,
+                    data.delimiter) || data.crLfMatcher
+                  .isReturn(this.byteBuffer, this.endBuffer)
+                  || data.crLfMatcher
+                  .isLineFeed(this.byteBuffer, this.endBuffer));
+              }
+
+              if (quoteBeforeDelimiterOrCrLf && quoteAfterDelimiter) {
+                enclosureFound = true;
+              } else {
+                enclosureFound = false;
+              }
             } while (keepGoing);
 
             // Did we reach the end of the buffer?
@@ -480,15 +501,14 @@ public class BlockDataHandler {
         // data.byteBuffer[data.startBuffer]
         //
         int length =
-            calculateFieldLength(newLineFound, newLines, enclosureFound, endOfBuffer);
+            calculateFieldLength(newLineFound, newLines, enclosureFound, endOfBuffer,
+              quoteAfterDelimiter, quoteBeforeDelimiterOrCrLf);
 
         byte[] field = new byte[length];
         System.arraycopy(this.byteBuffer, this.startBuffer, field, 0, length);
 
-        // Did we have any escaped characters in there?
-        //
-        if (escapedEnclosureFound > 0) {
-          field = this.removeEscapedEnclosures(field, escapedEnclosureFound);
+        if(quoteAfterDelimiter && quoteBeforeDelimiterOrCrLf){
+          field = removeEscapeChar(field,data.escapeCharacter);
         }
 
         if (doConversions) {
@@ -619,7 +639,7 @@ public class BlockDataHandler {
   }
 
   private int calculateFieldLength(boolean newLineFound, int newLines, boolean enclosureFound,
-      boolean endOfBuffer) {
+      boolean endOfBuffer, boolean quoteAfterDelimeter, boolean quoteBeforeDelimeterOrCrLf)
{
 
     int length = this.endBuffer - this.startBuffer;
     if (newLineFound) {
@@ -631,6 +651,7 @@ public class BlockDataHandler {
         this.startBuffer++; // offset for the enclosure in last field before EOF
       }
     }
+
     if (enclosureFound) {
       this.startBuffer++;
       length -= 2;
@@ -638,6 +659,12 @@ public class BlockDataHandler {
         length = 0;
       }
     }
+
+    if (!endOfBuffer && (quoteAfterDelimeter && !quoteBeforeDelimeterOrCrLf))
{
+      this.startBuffer++;
+      length--;
+    }
+
     if (length <= 0) {
       length = 0;
     }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInput.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInput.java
b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInput.java
index 76d5716..42f20cc 100644
--- a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInput.java
+++ b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInput.java
@@ -522,7 +522,8 @@ public class CsvInput extends BaseStep implements StepInterface {
       try {
         data.delimiter = data.encodingType
             .getBytes(environmentSubstitute(meta.getDelimiter()), meta.getEncoding());
-
+        data.escapeCharacter = data.encodingType
+          .getBytes(environmentSubstitute(meta.getEscapeCharacter()), meta.getEncoding());
         if (Const.isEmpty(meta.getEnclosure())) {
           data.enclosure = null;
         } else {

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputData.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputData.java
b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputData.java
index 77eaf1b..f97afed 100644
--- a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputData.java
+++ b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputData.java
@@ -30,6 +30,7 @@ public class CsvInputData extends BaseStepData implements StepDataInterface
{
 
   public byte[] delimiter;
   public byte[] enclosure;
+  public byte[] escapeCharacter;
   public int preferredBufferSize;
   public int totalNumberOfSteps;
   public boolean parallel;

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputMeta.java
b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputMeta.java
index 6f895b1..0e4c2e7 100644
--- a/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputMeta.java
+++ b/processing/src/main/java/org/carbondata/processing/csvreaderstep/CsvInputMeta.java
@@ -97,6 +97,8 @@ public class CsvInputMeta extends BaseStepMeta
 
   private String blocksID;
 
+  private String escapeCharacter;
+
   public CsvInputMeta() {
     super(); // allocate BaseStepMeta
     allocate(0);
@@ -116,6 +118,7 @@ public class CsvInputMeta extends BaseStepMeta
     bufferSize = "50000";
     currentRestructNumber = -1;
     blocksID = "";
+    escapeCharacter ="\\";
   }
 
   private void readData(Node stepnode) throws KettleXMLException {
@@ -152,7 +155,7 @@ public class CsvInputMeta extends BaseStepMeta
       currentRestructNumber =
           Integer.parseInt(XMLHandler.getTagValue(stepnode, "currentRestructNumber"));
       blocksID = XMLHandler.getTagValue(stepnode, "blocksID");
-
+      escapeCharacter = XMLHandler.getTagValue(stepnode, "escapeCharacter");
       Node fields = XMLHandler.getSubNode(stepnode, getXmlCode("FIELDS"));
       int nrfields = XMLHandler.countNodes(fields, getXmlCode("FIELD"));
 
@@ -169,7 +172,8 @@ public class CsvInputMeta extends BaseStepMeta
         inputFields[i].setFormat(XMLHandler.getTagValue(fnode, getXmlCode("FIELD_FORMAT")));
         inputFields[i]
             .setCurrencySymbol(XMLHandler.getTagValue(fnode, getXmlCode("FIELD_CURRENCY")));
-        inputFields[i].setDecimalSymbol(XMLHandler.getTagValue(fnode, getXmlCode("FIELD_DECIMAL")));
+        inputFields[i].setDecimalSymbol(XMLHandler.getTagValue(fnode,
+            getXmlCode("FIELD_DECIMAL")));
         inputFields[i].setGroupSymbol(XMLHandler.getTagValue(fnode, getXmlCode("FIELD_GROUP")));
         inputFields[i]
             .setLength(Const.toInt(XMLHandler.getTagValue(fnode, getXmlCode("FIELD_LENGTH")),
-1));
@@ -214,7 +218,7 @@ public class CsvInputMeta extends BaseStepMeta
     retval.append("    ")
         .append(XMLHandler.addTagValue("currentRestructNumber", currentRestructNumber));
     retval.append("    ").append(XMLHandler.addTagValue("blocksID", blocksID));
-
+    retval.append("    ").append(XMLHandler.addTagValue("escapeCharacter", escapeCharacter));
     retval.append("    ").append(XMLHandler.openTag(getXmlCode("FIELDS"))).append(Const.CR);
     for (int i = 0; i < inputFields.length; i++) {
       TextFileInputField field = inputFields[i];
@@ -266,6 +270,7 @@ public class CsvInputMeta extends BaseStepMeta
       encoding = rep.getStepAttributeString(idStep, getRepCode("ENCODING"));
       currentRestructNumber = (int) rep.getStepAttributeInteger(idStep, "currentRestructNumber");
       blocksID = rep.getStepAttributeString(idStep, getRepCode("blocksID"));
+      escapeCharacter = rep.getStepAttributeString(idStep, getRepCode("escapeCharacter"));
       int nrfields = rep.countNrStepAttributes(idStep, getRepCode("FIELD_NAME"));
 
       allocate(nrfields);
@@ -320,6 +325,8 @@ public class CsvInputMeta extends BaseStepMeta
       rep.saveStepAttribute(idTransformation, idStep, "currentRestructNumber",
           currentRestructNumber);
       rep.saveStepAttribute(idTransformation, idStep, getRepCode("blocksID"), blocksID);
+      rep.saveStepAttribute(idTransformation, idStep, getRepCode("escapeCharacter"),
+          escapeCharacter);
       for (int i = 0; i < inputFields.length; i++) {
         TextFileInputField field = inputFields[i];
 
@@ -605,7 +612,11 @@ public class CsvInputMeta extends BaseStepMeta
   }
 
   public String getEscapeCharacter() {
-    return null;
+    return escapeCharacter;
+  }
+
+  public void setEscapeCharacter(String escapeCharacter){
+    this.escapeCharacter = escapeCharacter;
   }
 
   public String getFileType() {
@@ -811,6 +822,8 @@ public class CsvInputMeta extends BaseStepMeta
           currentRestructNumber = (Integer) entry.getValue();
         } else if ("blocksID".equals(attributeKey)) {
           blocksID = (String) entry.getValue();
+        } else if ("escapeCharacter".equals(attributeKey)) {
+          escapeCharacter = (String) entry.getValue();
         } else {
           throw new RuntimeException(
               "Unhandled metadata injection of attribute: " + attr.toString() + " - " + attr

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
b/processing/src/main/java/org/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
index b48393d..44ce52b 100644
--- a/processing/src/main/java/org/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
+++ b/processing/src/main/java/org/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
@@ -87,6 +87,8 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable
{
 
   private String blocksID;
 
+  private String escapeCharacter;
+
   public DataProcessTaskStatus(String schemaName, String cubeName, String tableName) {
     this.schemaName = schemaName;
     this.cubeName = cubeName;
@@ -290,4 +292,12 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable
{
   public void setBlocksID(String blocksID) {
     this.blocksID = blocksID;
   }
+
+  public String getEscapeCharacter() {
+    return escapeCharacter;
+  }
+
+  public void setEscapeCharacter(String escapeCharacter) {
+    this.escapeCharacter = escapeCharacter;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/dataprocessor/IDataProcessStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/dataprocessor/IDataProcessStatus.java
b/processing/src/main/java/org/carbondata/processing/dataprocessor/IDataProcessStatus.java
index 23b40e8..54c7463 100644
--- a/processing/src/main/java/org/carbondata/processing/dataprocessor/IDataProcessStatus.java
+++ b/processing/src/main/java/org/carbondata/processing/dataprocessor/IDataProcessStatus.java
@@ -197,4 +197,6 @@ public interface IDataProcessStatus {
   void setCsvDelimiter(String csvDelimiter);
 
   String getBlocksID();
+
+  String getEscapeCharacter();
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5045d739/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java
b/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java
index 00f233c..d074473 100644
--- a/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java
+++ b/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java
@@ -178,6 +178,7 @@ public class GraphGenerator {
   private String factStoreLocation;
   private int currentRestructNumber;
   private String blocksID;
+  private String escapeCharacter;
   /**
    * task id, each spark task has a unique id
    */
@@ -211,6 +212,7 @@ public class GraphGenerator {
     this.taskNo = dataLoadModel.getTaskNo();
     this.factTimeStamp = dataLoadModel.getFactTimeStamp();
     this.segmentId = segmentId;
+    this.escapeCharacter = dataLoadModel.getEscapeCharacter();
     initialise();
     LOGGER.info("************* Is Columnar Storage" + isColumnar);
   }
@@ -438,6 +440,7 @@ public class GraphGenerator {
         CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
     //set blocks info id
     csvInputMeta.setBlocksID(this.blocksID);
+    csvInputMeta.setEscapeCharacter(this.escapeCharacter);
     csvDataStep.setDraw(true);
     csvDataStep.setDescription("Read raw data from " + GraphGeneratorConstants.CSV_INPUT);
 


Mime
View raw message