gobblin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hut...@apache.org
Subject [1/2] incubator-gobblin git commit: [GOBBLIN-271] Move the grok converter to the gobblin-grok module
Date Fri, 29 Sep 2017 17:43:43 GMT
Repository: incubator-gobblin
Updated Branches:
  refs/heads/master 70cbe91b9 -> adb810a7b


[GOBBLIN-271] Move the grok converter to the gobblin-grok module


Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/791306b8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/791306b8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/791306b8

Branch: refs/heads/master
Commit: 791306b866dc02f56ab28ffa099feea411efb028
Parents: 8284bb7
Author: Hung Tran <hutran@linkedin.com>
Authored: Fri Sep 29 10:07:23 2017 -0700
Committer: Hung Tran <hutran@linkedin.com>
Committed: Fri Sep 29 10:07:23 2017 -0700

----------------------------------------------------------------------
 gobblin-core/build.gradle                       |   1 -
 .../converter/grok/GrokToJsonConverter.java     | 219 -------------------
 .../src/main/resources/grok/grok-base-patterns  |  97 --------
 .../converter/grok/GrokToJsonConverterTest.java | 118 ----------
 .../converter/grok/convertedRecord.json         |   1 -
 .../grok/convertedS3AccessLogRecord.json        |   1 -
 .../converter/grok/s3AccessLogSchema.json       | 138 ------------
 .../grok/schemaWithNonNullableFields.json       |  66 ------
 .../grok/schemaWithNullableFields.json          |  66 ------
 .../src/test/resources/grok/grok-patterns       |  97 --------
 gobblin-modules/gobblin-grok/build.gradle       |  32 +++
 .../converter/grok/GrokToJsonConverter.java     | 219 +++++++++++++++++++
 .../src/main/resources/grok/grok-base-patterns  |  97 ++++++++
 .../converter/grok/GrokToJsonConverterTest.java | 118 ++++++++++
 .../converter/grok/convertedRecord.json         |   1 +
 .../grok/convertedS3AccessLogRecord.json        |   1 +
 .../converter/grok/s3AccessLogSchema.json       | 138 ++++++++++++
 .../grok/schemaWithNonNullableFields.json       |  66 ++++++
 .../grok/schemaWithNullableFields.json          |  66 ++++++
 .../src/test/resources/grok/grok-patterns       |  97 ++++++++
 20 files changed, 835 insertions(+), 804 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/build.gradle
----------------------------------------------------------------------
diff --git a/gobblin-core/build.gradle b/gobblin-core/build.gradle
index 9eeae56..ee7a77c 100644
--- a/gobblin-core/build.gradle
+++ b/gobblin-core/build.gradle
@@ -56,7 +56,6 @@ dependencies {
   compile externalDependency.findBugsAnnotations
   compile externalDependency.oltu
   compile externalDependency.opencsv
-  compile externalDependency.grok
   compile externalDependency.hadoopHdfs
 
   runtime externalDependency.protobuf

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
----------------------------------------------------------------------
diff --git a/gobblin-core/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java b/gobblin-core/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
deleted file mode 100644
index 1568eb7..0000000
--- a/gobblin-core/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gobblin.converter.grok;
-
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.gson.JsonArray;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonNull;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-
-import io.thekraken.grok.api.Grok;
-import io.thekraken.grok.api.Match;
-import io.thekraken.grok.api.exception.GrokException;
-
-import org.apache.gobblin.configuration.WorkUnitState;
-import org.apache.gobblin.converter.Converter;
-import org.apache.gobblin.converter.DataConversionException;
-import org.apache.gobblin.converter.SchemaConversionException;
-import org.apache.gobblin.converter.SingleRecordIterable;
-import org.apache.gobblin.util.DatasetFilterUtils;
-
-
-/**
- * GrokToJsonConverter accepts already deserialized text row, String, where you can use.
- *
- * Converts Text to JSON based on Grok pattern. Schema is represented by the form of JsonArray same interface being used by CsvToJonConverter.
- * Each text record is represented by a String.
- * The converter only supports Grok patterns where every group is named because it uses the group names as column names.
- *
- * The following config properties can be set:
- * The grok pattern to use for the conversion:
- * converter.grokToJsonConverter.grokPattern ="^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"
- *
- * Path to the file which contains the base grok patterns which can be used in the converter's GROK pattern (if not set it will use the default ones):
- * converter.grokToJsonConverter.baseGrokPatternsFile=
- **
- * Specify a comma separated list of regexes which will be applied on the fields and matched one will be converted to json null:
- * converter.grokToJsonConverter.nullStringRegexes="[-\s]"
- *
- * Example of schema:
- * [
- {
- "columnName": "Day",
- "comment": "",
- "isNullable": "true",
- "dataType": {
- "type": "string"
- }
- },
- {
- "columnName": "Pageviews",
- "comment": "",
- "isNullable": "true",
- "dataType": {
- "type": "long"
- }
- }
- ]
- */
-public class GrokToJsonConverter extends Converter<String, JsonArray, String, JsonObject> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(GrokToJsonConverter.class);
-  private static final JsonParser JSON_PARSER = new JsonParser();
-  private static final String COLUMN_NAME_KEY = "columnName";
-  private static final String DATA_TYPE = "dataType";
-  private static final String TYPE_KEY = "type";
-  private static final String NULLABLE = "isNullable";
-
-  public static final String GROK_PATTERN = "converter.grokToJsonConverter.grokPattern";
-  public static final String BASE_PATTERNS_FILE = "converter.grokToJsonConverter.baseGrokPatternsFile";
-  public static final String NULLSTRING_REGEXES = "converter.grokToJsonConverter.nullStringRegexes";
-
-  public static final String DEFAULT_GROK_PATTERNS_FILE = "/grok/grok-patterns";
-
-  private List<Pattern> nullStringRegexes;
-
-  private Grok grok;
-
-  @Override
-  public Converter<String, JsonArray, String, JsonObject> init(WorkUnitState workUnit) {
-    super.init(workUnit);
-    String pattern = workUnit.getProp(GROK_PATTERN);
-    String patternsFile = workUnit.getProp(BASE_PATTERNS_FILE);
-    this.nullStringRegexes = DatasetFilterUtils.getPatternsFromStrings(workUnit.getPropAsList(NULLSTRING_REGEXES, ""));
-
-    InputStreamReader grokPatterns;
-    try {
-      if (patternsFile == null) {
-        grokPatterns = new InputStreamReader(getClass().getResourceAsStream("/grok/grok-base-patterns"), "UTF8");
-      } else {
-        grokPatterns = new InputStreamReader(new FileInputStream(patternsFile), "UTF8");
-      }
-      grok = new Grok();
-      grok.addPatternFromReader(grokPatterns);
-      grok.compile(pattern);
-    } catch (GrokException | FileNotFoundException | UnsupportedEncodingException e) {
-      throw new RuntimeException("Error initializing GROK: " + e);
-    }
-
-    return this;
-  }
-
-  @Override
-  public JsonArray convertSchema(String inputSchema, WorkUnitState workUnit)
-      throws SchemaConversionException {
-    Preconditions.checkNotNull(inputSchema, "inputSchema is required.");
-    return JSON_PARSER.parse(inputSchema).getAsJsonArray();
-  }
-
-  /**
-   * Converts Text (String) to JSON based on a Grok regexp expression.
-   * By default, fields between Text and JSON are mapped by Grok SEMANTIC which is the identifier you give to the piece of text being matched in your Grok expression.
-   *
-   *
-   * e.g:
-   * {@inheritDoc}
-   * @see Converter#convertRecord(Object, Object, WorkUnitState)
-   */
-  @Override
-  public Iterable<JsonObject> convertRecord(JsonArray outputSchema, String inputRecord, WorkUnitState workUnit)
-      throws DataConversionException {
-
-    JsonObject outputRecord = createOutput(outputSchema, inputRecord);
-
-    LOG.debug("Converted into " + outputRecord);
-
-    return new SingleRecordIterable<JsonObject>(outputRecord);
-  }
-
-  @VisibleForTesting
-  JsonObject createOutput(JsonArray outputSchema, String inputRecord)
-      throws DataConversionException {
-    JsonObject outputRecord = new JsonObject();
-
-    Match gm = grok.match(inputRecord);
-    gm.captures();
-
-    JsonElement capturesJson = JSON_PARSER.parse(gm.toJson());
-
-    for (JsonElement anOutputSchema : outputSchema) {
-      JsonObject outputSchemaJsonObject = anOutputSchema.getAsJsonObject();
-      String key = outputSchemaJsonObject.get(COLUMN_NAME_KEY).getAsString();
-      String type = outputSchemaJsonObject.getAsJsonObject(DATA_TYPE).get(TYPE_KEY).getAsString();
-
-      if (isFieldNull(capturesJson, key)) {
-        if (!outputSchemaJsonObject.get(NULLABLE).getAsBoolean()) {
-          throw new DataConversionException(
-              "Field " + key + " is null or not exists but it is non-nullable by the schema.");
-        }
-        outputRecord.add(key, JsonNull.INSTANCE);
-      } else {
-        JsonElement jsonElement = capturesJson.getAsJsonObject().get(key);
-        switch (type) {
-          case "int":
-            outputRecord.addProperty(key, jsonElement.getAsInt());
-            break;
-          case "long":
-            outputRecord.addProperty(key, jsonElement.getAsLong());
-            break;
-          case "double":
-            outputRecord.addProperty(key, jsonElement.getAsDouble());
-            break;
-          case "float":
-            outputRecord.addProperty(key, jsonElement.getAsFloat());
-            break;
-          case "boolean":
-            outputRecord.addProperty(key, jsonElement.getAsBoolean());
-            break;
-          case "string":
-          default:
-            outputRecord.addProperty(key, jsonElement.getAsString());
-        }
-      }
-    }
-    return outputRecord;
-  }
-
-  private boolean isFieldNull(JsonElement capturesJson, String key) {
-    JsonObject jsonObject = capturesJson.getAsJsonObject();
-
-    if (!jsonObject.has(key)) {
-      return true;
-    }
-
-    for (Pattern pattern : this.nullStringRegexes) {
-      if (pattern.matcher(jsonObject.get(key).getAsString()).matches()) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/main/resources/grok/grok-base-patterns
----------------------------------------------------------------------
diff --git a/gobblin-core/src/main/resources/grok/grok-base-patterns b/gobblin-core/src/main/resources/grok/grok-base-patterns
deleted file mode 100644
index 3793e02..0000000
--- a/gobblin-core/src/main/resources/grok/grok-base-patterns
+++ /dev/null
@@ -1,97 +0,0 @@
-#Forked from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
-
-USERNAME [a-zA-Z0-9._-]+
-USER %{USERNAME}
-EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
-EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
-INT (?:[+-]?(?:[0-9]+))
-BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
-NUMBER (?:%{BASE10NUM})
-BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
-BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
-
-POSINT \b(?:[1-9][0-9]*)\b
-NONNEGINT \b(?:[0-9]+)\b
-WORD \b\w+\b
-NOTSPACE \S+
-SPACE \s*
-DATA .*?
-GREEDYDATA .*
-QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
-UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
-# URN, allowing use of RFC 2141 section 2.3 reserved characters
-URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
-
-# Networking
-MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
-CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
-WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
-COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
-IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5
 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
-IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
-IP (?:%{IPV6}|%{IPV4})
-HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
-IPORHOST (?:%{IP}|%{HOSTNAME})
-HOSTPORT %{IPORHOST}:%{POSINT}
-
-# paths
-PATH (?:%{UNIXPATH}|%{WINPATH})
-UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
-TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
-WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
-URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
-URIHOST %{IPORHOST}(?::%{POSINT:port})?
-# uripath comes loosely from RFC1738, but mostly from what Firefox
-# doesn't turn into %XX
-URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
-#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
-URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
-URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
-URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
-
-# Months: January, Feb, 3, 03, 12, December
-MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
-MONTHNUM (?:0?[1-9]|1[0-2])
-MONTHNUM2 (?:0[1-9]|1[0-2])
-MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
-
-# Days: Monday, Tue, Thu, etc...
-DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
-
-# Years?
-YEAR (?>\d\d){1,2}
-HOUR (?:2[0123]|[01]?[0-9])
-MINUTE (?:[0-5][0-9])
-# '60' is a leap second in most time standards and thus is valid.
-SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
-TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
-# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
-DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
-DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
-ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
-ISO8601_SECOND (?:%{SECOND}|60)
-TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
-DATE %{DATE_US}|%{DATE_EU}
-DATESTAMP %{DATE}[- ]%{TIME}
-TZ (?:[APMCE][SD]T|UTC)
-DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
-DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
-DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
-DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
-
-# Syslog Dates: Month Day HH:MM:SS
-SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
-PROG [\x21-\x5a\x5c\x5e-\x7e]+
-SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
-SYSLOGHOST %{IPORHOST}
-SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
-HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
-
-# Shortcuts
-QS %{QUOTEDSTRING}
-
-# Log formats
-SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
-
-# Log Levels
-LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java b/gobblin-core/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
deleted file mode 100644
index 3a4b78b..0000000
--- a/gobblin-core/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.gobblin.converter.grok;
-
-import java.io.InputStreamReader;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import com.google.gson.JsonArray;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-
-import gobblin.configuration.WorkUnitState;
-
-import org.apache.gobblin.converter.DataConversionException;
-
-
-@Test(groups = {"gobblin.converter"})
-public class GrokToJsonConverterTest {
-  @Test
-  public void convertOutputWithNullableFields()
-      throws Exception {
-    JsonParser parser = new JsonParser();
-
-    String inputRecord =
-        "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466";
-
-    JsonElement jsonElement = parser
-        .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNullableFields.json")));
-    JsonArray outputSchema = jsonElement.getAsJsonArray();
-
-    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
-    WorkUnitState workUnitState = new WorkUnitState();
-    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
-        "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})");
-
-    grokToJsonConverter.init(workUnitState);
-    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
-
-    JsonObject expected =
-        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json")))
-            .getAsJsonObject();
-    Assert.assertEquals(actual, expected);
-    grokToJsonConverter.close();
-  }
-
-  @Test(expectedExceptions = DataConversionException.class)
-  public void convertOutputWithNonNullableFieldsShouldThrowDataConversionException()
-      throws Exception {
-    JsonParser parser = new JsonParser();
-
-    String inputRecord =
-        "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466";
-
-    JsonElement jsonElement = parser.parse(
-        new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNonNullableFields.json")));
-    JsonArray outputSchema = jsonElement.getAsJsonArray();
-
-    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
-    WorkUnitState workUnitState = new WorkUnitState();
-    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
-        "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})");
-
-    grokToJsonConverter.init(workUnitState);
-    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
-
-    JsonObject expected =
-        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json")))
-            .getAsJsonObject();
-    grokToJsonConverter.close();
-  }
-
-  @Test
-  public void convertWithNullStringSet()
-      throws Exception {
-    JsonParser parser = new JsonParser();
-
-    String inputRecord =
-        "79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be mybucket [06/Feb/2014:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - \"GET /mybucket?versioning HTTP/1.1\" 200 - 113 - 7 - \"-\" \"S3Console/0.4\" -";
-
-    JsonElement jsonElement =
-        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/s3AccessLogSchema.json")));
-    JsonArray outputSchema = jsonElement.getAsJsonArray();
-
-    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
-    WorkUnitState workUnitState = new WorkUnitState();
-    //Grok expression was taken from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/aws
-    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
-        "%{WORD:owner} %{NOTSPACE:bucket} \\[%{HTTPDATE:timestamp}\\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:\"?%{QS:agent}\"?|-) (?:-|%{NOTSPACE:version_id})");
-    workUnitState.setProp(GrokToJsonConverter.NULLSTRING_REGEXES, "[\\s-]");
-
-    grokToJsonConverter.init(workUnitState);
-    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
-
-    JsonObject expected = parser
-        .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedS3AccessLogRecord.json")))
-        .getAsJsonObject();
-    Assert.assertEquals(actual, expected);
-    grokToJsonConverter.close();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/converter/grok/convertedRecord.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/grok/convertedRecord.json b/gobblin-core/src/test/resources/converter/grok/convertedRecord.json
deleted file mode 100644
index 69f45f1..0000000
--- a/gobblin-core/src/test/resources/converter/grok/convertedRecord.json
+++ /dev/null
@@ -1 +0,0 @@
-{"clientip":"10.121.123.104","ident":null,"auth":null,"timestamp":"01/Nov/2012:21:01:17 +0100","request":"/cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F","httpversion":1.1,"response":302,"bytes":466}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/grok/convertedS3AccessLogRecord.json b/gobblin-core/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
deleted file mode 100644
index 29da948..0000000
--- a/gobblin-core/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
+++ /dev/null
@@ -1 +0,0 @@
-{"owner":"79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be","bucket":"mybucket","timestamp":"06/Feb/2014:00:00:38 +0000","clientip":"192.0.2.3","requester":"79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be","request_id":"3E57427F3EXAMPLE","operation":"REST.GET.VERSIONING","key":null,"response":200,"error_code":null,"bytes":113,"object_size":null,"request_time_ms":7,"turnaround_time_ms":null,"referrer":null,"agent":"S3Console/0.4","version_id":null}

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/converter/grok/s3AccessLogSchema.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/grok/s3AccessLogSchema.json b/gobblin-core/src/test/resources/converter/grok/s3AccessLogSchema.json
deleted file mode 100644
index 934399c..0000000
--- a/gobblin-core/src/test/resources/converter/grok/s3AccessLogSchema.json
+++ /dev/null
@@ -1,138 +0,0 @@
-[
-  {
-    "columnName": "owner",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "bucket",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "timestamp",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "clientip",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "requester",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "request_id",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "operation",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "key",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "response",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "error_code",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "bytes",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "object_size",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "request_time_ms",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "turnaround_time_ms",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "referrer",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "agent",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "version_id",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  }
-]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/converter/grok/schemaWithNonNullableFields.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/grok/schemaWithNonNullableFields.json b/gobblin-core/src/test/resources/converter/grok/schemaWithNonNullableFields.json
deleted file mode 100644
index a1ce180..0000000
--- a/gobblin-core/src/test/resources/converter/grok/schemaWithNonNullableFields.json
+++ /dev/null
@@ -1,66 +0,0 @@
-[
-  {
-    "columnName": "clientip",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "ident",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "auth",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "timestamp",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "request",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "httpversion",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "double"
-    }
-  },
-  {
-    "columnName": "response",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "bytes",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  }
-]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/converter/grok/schemaWithNullableFields.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/grok/schemaWithNullableFields.json b/gobblin-core/src/test/resources/converter/grok/schemaWithNullableFields.json
deleted file mode 100644
index b8b0536..0000000
--- a/gobblin-core/src/test/resources/converter/grok/schemaWithNullableFields.json
+++ /dev/null
@@ -1,66 +0,0 @@
-[
-  {
-    "columnName": "clientip",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "ident",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "auth",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "timestamp",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "request",
-    "comment": "",
-    "isNullable": "false",
-    "dataType": {
-      "type": "string"
-    }
-  },
-  {
-    "columnName": "httpversion",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "double"
-    }
-  },
-  {
-    "columnName": "response",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  },
-  {
-    "columnName": "bytes",
-    "comment": "",
-    "isNullable": "true",
-    "dataType": {
-      "type": "int"
-    }
-  }
-]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-core/src/test/resources/grok/grok-patterns
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/grok/grok-patterns b/gobblin-core/src/test/resources/grok/grok-patterns
deleted file mode 100644
index 3793e02..0000000
--- a/gobblin-core/src/test/resources/grok/grok-patterns
+++ /dev/null
@@ -1,97 +0,0 @@
-#Forked from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
-
-USERNAME [a-zA-Z0-9._-]+
-USER %{USERNAME}
-EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
-EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
-INT (?:[+-]?(?:[0-9]+))
-BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
-NUMBER (?:%{BASE10NUM})
-BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
-BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
-
-POSINT \b(?:[1-9][0-9]*)\b
-NONNEGINT \b(?:[0-9]+)\b
-WORD \b\w+\b
-NOTSPACE \S+
-SPACE \s*
-DATA .*?
-GREEDYDATA .*
-QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
-UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
-# URN, allowing use of RFC 2141 section 2.3 reserved characters
-URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
-
-# Networking
-MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
-CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
-WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
-COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
-IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5
 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
-IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
-IP (?:%{IPV6}|%{IPV4})
-HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
-IPORHOST (?:%{IP}|%{HOSTNAME})
-HOSTPORT %{IPORHOST}:%{POSINT}
-
-# paths
-PATH (?:%{UNIXPATH}|%{WINPATH})
-UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
-TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
-WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
-URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
-URIHOST %{IPORHOST}(?::%{POSINT:port})?
-# uripath comes loosely from RFC1738, but mostly from what Firefox
-# doesn't turn into %XX
-URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
-#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
-URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
-URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
-URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
-
-# Months: January, Feb, 3, 03, 12, December
-MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
-MONTHNUM (?:0?[1-9]|1[0-2])
-MONTHNUM2 (?:0[1-9]|1[0-2])
-MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
-
-# Days: Monday, Tue, Thu, etc...
-DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
-
-# Years?
-YEAR (?>\d\d){1,2}
-HOUR (?:2[0123]|[01]?[0-9])
-MINUTE (?:[0-5][0-9])
-# '60' is a leap second in most time standards and thus is valid.
-SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
-TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
-# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
-DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
-DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
-ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
-ISO8601_SECOND (?:%{SECOND}|60)
-TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
-DATE %{DATE_US}|%{DATE_EU}
-DATESTAMP %{DATE}[- ]%{TIME}
-TZ (?:[APMCE][SD]T|UTC)
-DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
-DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
-DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
-DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
-
-# Syslog Dates: Month Day HH:MM:SS
-SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
-PROG [\x21-\x5a\x5c\x5e-\x7e]+
-SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
-SYSLOGHOST %{IPORHOST}
-SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
-HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
-
-# Shortcuts
-QS %{QUOTEDSTRING}
-
-# Log formats
-SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
-
-# Log Levels
-LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/build.gradle
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/build.gradle b/gobblin-modules/gobblin-grok/build.gradle
new file mode 100644
index 0000000..bbc4b9e
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/build.gradle
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'java'
+
+dependencies {
+  compile project(":gobblin-api")
+  compile project(":gobblin-core-base")
+  compile project(":gobblin-data-management")
+
+  compile externalDependency.grok
+  compile externalDependency.slf4j
+
+  testCompile externalDependency.testng
+}
+
+ext.classification="library"
+

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java b/gobblin-modules/gobblin-grok/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
new file mode 100644
index 0000000..1568eb7
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/main/java/org/apache/gobblin/converter/grok/GrokToJsonConverter.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gobblin.converter.grok;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonNull;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+import io.thekraken.grok.api.Grok;
+import io.thekraken.grok.api.Match;
+import io.thekraken.grok.api.exception.GrokException;
+
+import org.apache.gobblin.configuration.WorkUnitState;
+import org.apache.gobblin.converter.Converter;
+import org.apache.gobblin.converter.DataConversionException;
+import org.apache.gobblin.converter.SchemaConversionException;
+import org.apache.gobblin.converter.SingleRecordIterable;
+import org.apache.gobblin.util.DatasetFilterUtils;
+
+
+/**
+ * GrokToJsonConverter accepts already deserialized text row, String, where you can use.
+ *
+ * Converts Text to JSON based on Grok pattern. Schema is represented by the form of JsonArray same interface being used by CsvToJonConverter.
+ * Each text record is represented by a String.
+ * The converter only supports Grok patterns where every group is named because it uses the group names as column names.
+ *
+ * The following config properties can be set:
+ * The grok pattern to use for the conversion:
+ * converter.grokToJsonConverter.grokPattern ="^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"
+ *
+ * Path to the file which contains the base grok patterns which can be used in the converter's GROK pattern (if not set it will use the default ones):
+ * converter.grokToJsonConverter.baseGrokPatternsFile=
+ **
+ * Specify a comma separated list of regexes which will be applied on the fields and matched one will be converted to json null:
+ * converter.grokToJsonConverter.nullStringRegexes="[-\s]"
+ *
+ * Example of schema:
+ * [
+ {
+ "columnName": "Day",
+ "comment": "",
+ "isNullable": "true",
+ "dataType": {
+ "type": "string"
+ }
+ },
+ {
+ "columnName": "Pageviews",
+ "comment": "",
+ "isNullable": "true",
+ "dataType": {
+ "type": "long"
+ }
+ }
+ ]
+ */
+public class GrokToJsonConverter extends Converter<String, JsonArray, String, JsonObject> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(GrokToJsonConverter.class);
+  private static final JsonParser JSON_PARSER = new JsonParser();
+  private static final String COLUMN_NAME_KEY = "columnName";
+  private static final String DATA_TYPE = "dataType";
+  private static final String TYPE_KEY = "type";
+  private static final String NULLABLE = "isNullable";
+
+  public static final String GROK_PATTERN = "converter.grokToJsonConverter.grokPattern";
+  public static final String BASE_PATTERNS_FILE = "converter.grokToJsonConverter.baseGrokPatternsFile";
+  public static final String NULLSTRING_REGEXES = "converter.grokToJsonConverter.nullStringRegexes";
+
+  public static final String DEFAULT_GROK_PATTERNS_FILE = "/grok/grok-patterns";
+
+  private List<Pattern> nullStringRegexes;
+
+  private Grok grok;
+
+  @Override
+  public Converter<String, JsonArray, String, JsonObject> init(WorkUnitState workUnit) {
+    super.init(workUnit);
+    String pattern = workUnit.getProp(GROK_PATTERN);
+    String patternsFile = workUnit.getProp(BASE_PATTERNS_FILE);
+    this.nullStringRegexes = DatasetFilterUtils.getPatternsFromStrings(workUnit.getPropAsList(NULLSTRING_REGEXES, ""));
+
+    InputStreamReader grokPatterns;
+    try {
+      if (patternsFile == null) {
+        grokPatterns = new InputStreamReader(getClass().getResourceAsStream("/grok/grok-base-patterns"), "UTF8");
+      } else {
+        grokPatterns = new InputStreamReader(new FileInputStream(patternsFile), "UTF8");
+      }
+      grok = new Grok();
+      grok.addPatternFromReader(grokPatterns);
+      grok.compile(pattern);
+    } catch (GrokException | FileNotFoundException | UnsupportedEncodingException e) {
+      throw new RuntimeException("Error initializing GROK: " + e);
+    }
+
+    return this;
+  }
+
+  @Override
+  public JsonArray convertSchema(String inputSchema, WorkUnitState workUnit)
+      throws SchemaConversionException {
+    Preconditions.checkNotNull(inputSchema, "inputSchema is required.");
+    return JSON_PARSER.parse(inputSchema).getAsJsonArray();
+  }
+
+  /**
+   * Converts Text (String) to JSON based on a Grok regexp expression.
+   * By default, fields between Text and JSON are mapped by Grok SEMANTIC which is the identifier you give to the piece of text being matched in your Grok expression.
+   *
+   *
+   * e.g:
+   * {@inheritDoc}
+   * @see Converter#convertRecord(Object, Object, WorkUnitState)
+   */
+  @Override
+  public Iterable<JsonObject> convertRecord(JsonArray outputSchema, String inputRecord, WorkUnitState workUnit)
+      throws DataConversionException {
+
+    JsonObject outputRecord = createOutput(outputSchema, inputRecord);
+
+    LOG.debug("Converted into " + outputRecord);
+
+    return new SingleRecordIterable<JsonObject>(outputRecord);
+  }
+
+  @VisibleForTesting
+  JsonObject createOutput(JsonArray outputSchema, String inputRecord)
+      throws DataConversionException {
+    JsonObject outputRecord = new JsonObject();
+
+    Match gm = grok.match(inputRecord);
+    gm.captures();
+
+    JsonElement capturesJson = JSON_PARSER.parse(gm.toJson());
+
+    for (JsonElement anOutputSchema : outputSchema) {
+      JsonObject outputSchemaJsonObject = anOutputSchema.getAsJsonObject();
+      String key = outputSchemaJsonObject.get(COLUMN_NAME_KEY).getAsString();
+      String type = outputSchemaJsonObject.getAsJsonObject(DATA_TYPE).get(TYPE_KEY).getAsString();
+
+      if (isFieldNull(capturesJson, key)) {
+        if (!outputSchemaJsonObject.get(NULLABLE).getAsBoolean()) {
+          throw new DataConversionException(
+              "Field " + key + " is null or not exists but it is non-nullable by the schema.");
+        }
+        outputRecord.add(key, JsonNull.INSTANCE);
+      } else {
+        JsonElement jsonElement = capturesJson.getAsJsonObject().get(key);
+        switch (type) {
+          case "int":
+            outputRecord.addProperty(key, jsonElement.getAsInt());
+            break;
+          case "long":
+            outputRecord.addProperty(key, jsonElement.getAsLong());
+            break;
+          case "double":
+            outputRecord.addProperty(key, jsonElement.getAsDouble());
+            break;
+          case "float":
+            outputRecord.addProperty(key, jsonElement.getAsFloat());
+            break;
+          case "boolean":
+            outputRecord.addProperty(key, jsonElement.getAsBoolean());
+            break;
+          case "string":
+          default:
+            outputRecord.addProperty(key, jsonElement.getAsString());
+        }
+      }
+    }
+    return outputRecord;
+  }
+
+  private boolean isFieldNull(JsonElement capturesJson, String key) {
+    JsonObject jsonObject = capturesJson.getAsJsonObject();
+
+    if (!jsonObject.has(key)) {
+      return true;
+    }
+
+    for (Pattern pattern : this.nullStringRegexes) {
+      if (pattern.matcher(jsonObject.get(key).getAsString()).matches()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/main/resources/grok/grok-base-patterns
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/main/resources/grok/grok-base-patterns b/gobblin-modules/gobblin-grok/src/main/resources/grok/grok-base-patterns
new file mode 100644
index 0000000..3793e02
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/main/resources/grok/grok-base-patterns
@@ -0,0 +1,97 @@
+#Forked from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
+
+USERNAME [a-zA-Z0-9._-]+
+USER %{USERNAME}
+EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
+EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
+INT (?:[+-]?(?:[0-9]+))
+BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
+NUMBER (?:%{BASE10NUM})
+BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
+BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
+
+POSINT \b(?:[1-9][0-9]*)\b
+NONNEGINT \b(?:[0-9]+)\b
+WORD \b\w+\b
+NOTSPACE \S+
+SPACE \s*
+DATA .*?
+GREEDYDATA .*
+QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
+UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
+# URN, allowing use of RFC 2141 section 2.3 reserved characters
+URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
+
+# Networking
+MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
+CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
+WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
+COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
+IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5
 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
+IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
+IP (?:%{IPV6}|%{IPV4})
+HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
+IPORHOST (?:%{IP}|%{HOSTNAME})
+HOSTPORT %{IPORHOST}:%{POSINT}
+
+# paths
+PATH (?:%{UNIXPATH}|%{WINPATH})
+UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
+TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
+WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
+URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
+URIHOST %{IPORHOST}(?::%{POSINT:port})?
+# uripath comes loosely from RFC1738, but mostly from what Firefox
+# doesn't turn into %XX
+URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
+#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
+URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
+URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
+URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
+
+# Months: January, Feb, 3, 03, 12, December
+MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
+MONTHNUM (?:0?[1-9]|1[0-2])
+MONTHNUM2 (?:0[1-9]|1[0-2])
+MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
+
+# Days: Monday, Tue, Thu, etc...
+DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
+
+# Years?
+YEAR (?>\d\d){1,2}
+HOUR (?:2[0123]|[01]?[0-9])
+MINUTE (?:[0-5][0-9])
+# '60' is a leap second in most time standards and thus is valid.
+SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
+TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
+# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
+DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
+DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
+ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
+ISO8601_SECOND (?:%{SECOND}|60)
+TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
+DATE %{DATE_US}|%{DATE_EU}
+DATESTAMP %{DATE}[- ]%{TIME}
+TZ (?:[APMCE][SD]T|UTC)
+DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
+DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
+DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
+DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
+
+# Syslog Dates: Month Day HH:MM:SS
+SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
+PROG [\x21-\x5a\x5c\x5e-\x7e]+
+SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
+SYSLOGHOST %{IPORHOST}
+SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
+HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
+
+# Shortcuts
+QS %{QUOTEDSTRING}
+
+# Log formats
+SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
+
+# Log Levels
+LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java b/gobblin-modules/gobblin-grok/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
new file mode 100644
index 0000000..3a4b78b
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/java/org/apache/gobblin/converter/grok/GrokToJsonConverterTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.converter.grok;
+
+import java.io.InputStreamReader;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+
+import gobblin.configuration.WorkUnitState;
+
+import org.apache.gobblin.converter.DataConversionException;
+
+
+@Test(groups = {"gobblin.converter"})
+public class GrokToJsonConverterTest {
+  @Test
+  public void convertOutputWithNullableFields()
+      throws Exception {
+    JsonParser parser = new JsonParser();
+
+    String inputRecord =
+        "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466";
+
+    JsonElement jsonElement = parser
+        .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNullableFields.json")));
+    JsonArray outputSchema = jsonElement.getAsJsonArray();
+
+    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
+    WorkUnitState workUnitState = new WorkUnitState();
+    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
+        "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})");
+
+    grokToJsonConverter.init(workUnitState);
+    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
+
+    JsonObject expected =
+        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json")))
+            .getAsJsonObject();
+    Assert.assertEquals(actual, expected);
+    grokToJsonConverter.close();
+  }
+
+  @Test(expectedExceptions = DataConversionException.class)
+  public void convertOutputWithNonNullableFieldsShouldThrowDataConversionException()
+      throws Exception {
+    JsonParser parser = new JsonParser();
+
+    String inputRecord =
+        "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466";
+
+    JsonElement jsonElement = parser.parse(
+        new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNonNullableFields.json")));
+    JsonArray outputSchema = jsonElement.getAsJsonArray();
+
+    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
+    WorkUnitState workUnitState = new WorkUnitState();
+    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
+        "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})");
+
+    grokToJsonConverter.init(workUnitState);
+    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
+
+    JsonObject expected =
+        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json")))
+            .getAsJsonObject();
+    grokToJsonConverter.close();
+  }
+
+  @Test
+  public void convertWithNullStringSet()
+      throws Exception {
+    JsonParser parser = new JsonParser();
+
+    String inputRecord =
+        "79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be mybucket [06/Feb/2014:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - \"GET /mybucket?versioning HTTP/1.1\" 200 - 113 - 7 - \"-\" \"S3Console/0.4\" -";
+
+    JsonElement jsonElement =
+        parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/s3AccessLogSchema.json")));
+    JsonArray outputSchema = jsonElement.getAsJsonArray();
+
+    GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter();
+    WorkUnitState workUnitState = new WorkUnitState();
+    //Grok expression was taken from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/aws
+    workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN,
+        "%{WORD:owner} %{NOTSPACE:bucket} \\[%{HTTPDATE:timestamp}\\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:\"?%{QS:agent}\"?|-) (?:-|%{NOTSPACE:version_id})");
+    workUnitState.setProp(GrokToJsonConverter.NULLSTRING_REGEXES, "[\\s-]");
+
+    grokToJsonConverter.init(workUnitState);
+    JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
+
+    JsonObject expected = parser
+        .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedS3AccessLogRecord.json")))
+        .getAsJsonObject();
+    Assert.assertEquals(actual, expected);
+    grokToJsonConverter.close();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedRecord.json
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedRecord.json b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedRecord.json
new file mode 100644
index 0000000..69f45f1
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedRecord.json
@@ -0,0 +1 @@
+{"clientip":"10.121.123.104","ident":null,"auth":null,"timestamp":"01/Nov/2012:21:01:17 +0100","request":"/cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F","httpversion":1.1,"response":302,"bytes":466}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedS3AccessLogRecord.json b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
new file mode 100644
index 0000000..29da948
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/convertedS3AccessLogRecord.json
@@ -0,0 +1 @@
+{"owner":"79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be","bucket":"mybucket","timestamp":"06/Feb/2014:00:00:38 +0000","clientip":"192.0.2.3","requester":"79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be","request_id":"3E57427F3EXAMPLE","operation":"REST.GET.VERSIONING","key":null,"response":200,"error_code":null,"bytes":113,"object_size":null,"request_time_ms":7,"turnaround_time_ms":null,"referrer":null,"agent":"S3Console/0.4","version_id":null}

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/s3AccessLogSchema.json
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/s3AccessLogSchema.json b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/s3AccessLogSchema.json
new file mode 100644
index 0000000..934399c
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/s3AccessLogSchema.json
@@ -0,0 +1,138 @@
+[
+  {
+    "columnName": "owner",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "bucket",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "timestamp",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "clientip",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "requester",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "request_id",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "operation",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "key",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "response",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "error_code",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "bytes",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "object_size",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "request_time_ms",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "turnaround_time_ms",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "referrer",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "agent",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "version_id",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNonNullableFields.json
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNonNullableFields.json b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNonNullableFields.json
new file mode 100644
index 0000000..a1ce180
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNonNullableFields.json
@@ -0,0 +1,66 @@
+[
+  {
+    "columnName": "clientip",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "ident",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "auth",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "timestamp",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "request",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "httpversion",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "double"
+    }
+  },
+  {
+    "columnName": "response",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "bytes",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNullableFields.json
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNullableFields.json b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNullableFields.json
new file mode 100644
index 0000000..b8b0536
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/converter/grok/schemaWithNullableFields.json
@@ -0,0 +1,66 @@
+[
+  {
+    "columnName": "clientip",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "ident",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "auth",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "timestamp",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "request",
+    "comment": "",
+    "isNullable": "false",
+    "dataType": {
+      "type": "string"
+    }
+  },
+  {
+    "columnName": "httpversion",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "double"
+    }
+  },
+  {
+    "columnName": "response",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  },
+  {
+    "columnName": "bytes",
+    "comment": "",
+    "isNullable": "true",
+    "dataType": {
+      "type": "int"
+    }
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/791306b8/gobblin-modules/gobblin-grok/src/test/resources/grok/grok-patterns
----------------------------------------------------------------------
diff --git a/gobblin-modules/gobblin-grok/src/test/resources/grok/grok-patterns b/gobblin-modules/gobblin-grok/src/test/resources/grok/grok-patterns
new file mode 100644
index 0000000..3793e02
--- /dev/null
+++ b/gobblin-modules/gobblin-grok/src/test/resources/grok/grok-patterns
@@ -0,0 +1,97 @@
+#Forked from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns
+
+USERNAME [a-zA-Z0-9._-]+
+USER %{USERNAME}
+EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
+EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
+INT (?:[+-]?(?:[0-9]+))
+BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
+NUMBER (?:%{BASE10NUM})
+BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
+BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
+
+POSINT \b(?:[1-9][0-9]*)\b
+NONNEGINT \b(?:[0-9]+)\b
+WORD \b\w+\b
+NOTSPACE \S+
+SPACE \s*
+DATA .*?
+GREEDYDATA .*
+QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
+UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
+# URN, allowing use of RFC 2141 section 2.3 reserved characters
+URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
+
+# Networking
+MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
+CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
+WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
+COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
+IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5
 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
+IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
+IP (?:%{IPV6}|%{IPV4})
+HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
+IPORHOST (?:%{IP}|%{HOSTNAME})
+HOSTPORT %{IPORHOST}:%{POSINT}
+
+# paths
+PATH (?:%{UNIXPATH}|%{WINPATH})
+UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
+TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
+WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
+URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
+URIHOST %{IPORHOST}(?::%{POSINT:port})?
+# uripath comes loosely from RFC1738, but mostly from what Firefox
+# doesn't turn into %XX
+URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
+#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
+URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
+URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
+URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
+
+# Months: January, Feb, 3, 03, 12, December
+MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
+MONTHNUM (?:0?[1-9]|1[0-2])
+MONTHNUM2 (?:0[1-9]|1[0-2])
+MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
+
+# Days: Monday, Tue, Thu, etc...
+DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
+
+# Years?
+YEAR (?>\d\d){1,2}
+HOUR (?:2[0123]|[01]?[0-9])
+MINUTE (?:[0-5][0-9])
+# '60' is a leap second in most time standards and thus is valid.
+SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
+TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
+# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
+DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
+DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
+ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
+ISO8601_SECOND (?:%{SECOND}|60)
+TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
+DATE %{DATE_US}|%{DATE_EU}
+DATESTAMP %{DATE}[- ]%{TIME}
+TZ (?:[APMCE][SD]T|UTC)
+DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
+DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
+DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
+DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
+
+# Syslog Dates: Month Day HH:MM:SS
+SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
+PROG [\x21-\x5a\x5c\x5e-\x7e]+
+SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
+SYSLOGHOST %{IPORHOST}
+SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
+HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
+
+# Shortcuts
+QS %{QUOTEDSTRING}
+
+# Log formats
+SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
+
+# Log Levels
+LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
\ No newline at end of file


Mime
View raw message