metron-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From o...@apache.org
Subject metron git commit: METRON-933 STELLAR support for setting fields to regex match captures (ottobackwards) closes apache/metron#638
Date Tue, 11 Jul 2017 19:36:59 GMT
Repository: metron
Updated Branches:
  refs/heads/master dcec5a7cf -> cf165ff30


METRON-933 STELLAR support for setting fields to regex match captures (ottobackwards) closes
apache/metron#638


Project: http://git-wip-us.apache.org/repos/asf/metron/repo
Commit: http://git-wip-us.apache.org/repos/asf/metron/commit/cf165ff3
Tree: http://git-wip-us.apache.org/repos/asf/metron/tree/cf165ff3
Diff: http://git-wip-us.apache.org/repos/asf/metron/diff/cf165ff3

Branch: refs/heads/master
Commit: cf165ff306764641a2c7991ff93cf48228d7910d
Parents: dcec5a7
Author: ottobackwards <ottobackwards@gmail.com>
Authored: Tue Jul 11 15:11:12 2017 -0400
Committer: otto <otto@apache.org>
Committed: Tue Jul 11 15:11:12 2017 -0400

----------------------------------------------------------------------
 metron-stellar/stellar-common/README.md         |   9 ++
 .../stellar/common/utils/PatternCache.java      |  37 +++++++
 .../stellar/dsl/functions/RegExFunctions.java   | 103 +++++++++++++++++++
 .../stellar/dsl/functions/StringFunctions.java  |  23 -----
 .../dsl/functions/RegExFunctionsTest.java       |  70 +++++++++++++
 5 files changed, 219 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/README.md
----------------------------------------------------------------------
diff --git a/metron-stellar/stellar-common/README.md b/metron-stellar/stellar-common/README.md
index 2d2ffdd..8a2699d 100644
--- a/metron-stellar/stellar-common/README.md
+++ b/metron-stellar/stellar-common/README.md
@@ -161,6 +161,7 @@ In the core language functions, we support basic functional programming
primitiv
 | [ `PROTOCOL_TO_NAME`](#protocol_to_name)                                              
            |
 | [ `REDUCE`](#reduce)                                                                  
|
 | [ `REGEXP_MATCH`](#regexp_match)                                                      
            |
+| [ `REGEXP_GROUP_VAL`](#regexp_group_val)                                              
                    |
 | [ `SPLIT`](#split)                                                                    
            |
 | [ `STARTS_WITH`](#starts_with)                                                        
            |
 | [ `STATS_ADD`](../../metron-analytics/metron-statistics#stats_add)                    
            |
@@ -571,6 +572,14 @@ In the core language functions, we support basic functional programming
primitiv
     * string - The string to test
     * pattern - The proposed regex pattern
   * Returns: True if the regex pattern matches the string and false if otherwise.
+  
+### `REGEXP_GROUP_VAL`
+  * Description: Returns the value of a group in a regex against a string
+  * Input:
+    * string - The string to test
+    * pattern - The proposed regex pattern
+    * group - The integer that selects what group to select, starting at 1
+  * Returns: The value of the group, or null if not matched or no group at index.
 
 ### `STRING_ENTROPY`
   * Description: Computes the base-2 shannon entropy of a string.

http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java
----------------------------------------------------------------------
diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java
b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java
new file mode 100644
index 0000000..f0fcee9
--- /dev/null
+++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.metron.stellar.common.utils;
+
+import java.util.HashMap;
+import java.util.regex.Pattern;
+
+public enum PatternCache {
+  INSTANCE;
+
+  private static final ThreadLocal<HashMap<String,Pattern>> _cache = ThreadLocal.withInitial(()
->
+          new HashMap<>());
+
+  public Pattern getPattern(String patternString){
+    Pattern pattern = _cache.get().get(patternString);
+    if(pattern == null){
+      pattern = Pattern.compile(patternString);
+      _cache.get().put(patternString,pattern);
+    }
+    return pattern;
+  }
+}

http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java
----------------------------------------------------------------------
diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java
b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java
new file mode 100644
index 0000000..a1ea229
--- /dev/null
+++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.metron.stellar.dsl.functions;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.metron.stellar.common.utils.ConversionUtils;
+import org.apache.metron.stellar.common.utils.PatternCache;
+import org.apache.metron.stellar.dsl.BaseStellarFunction;
+import org.apache.metron.stellar.dsl.Stellar;
+
+public class RegExFunctions {
+
+  @Stellar(name = "REGEXP_MATCH",
+      description = "Determines whether a regex matches a string",
+      params = {
+          "string - The string to test",
+          "pattern - The proposed regex pattern"
+      },
+      returns = "True if the regex pattern matches the string and false if otherwise.")
+  public static class RegexpMatch extends BaseStellarFunction {
+
+    @Override
+    public Object apply(List<Object> list) {
+      if (list.size() < 2) {
+        throw new IllegalStateException(
+            "REGEXP_MATCH expects two args: [string, pattern] where pattern is a regexp pattern");
+      }
+      String patternString = (String) list.get(1);
+      String str = (String) list.get(0);
+      if (str == null || patternString == null) {
+        return false;
+      }
+      return PatternCache.INSTANCE.getPattern(patternString).matcher(str).matches();
+    }
+  }
+
+  @Stellar(name = "REGEXP_GROUP_VAL",
+      description = "Returns the value of a group in a regex against a string",
+      params = {
+          "string - The string to test",
+          "pattern - The proposed regex pattern",
+          "group - integer that selects what group to select, starting at 1"
+      },
+      returns = "The value of the group, or null if not matched or no group at index")
+  public static class RegexpGroupValue extends BaseStellarFunction {
+
+    @Override
+    public Object apply(List<Object> list) {
+      if (list.size() != 3) {
+        throw new IllegalStateException(
+            "REGEXP_GROUP_VAL expects three args: [string, pattern, int]" + ""
+                + "where pattern is a regexp pattern");
+      }
+      String stringPattern = (String) list.get(1);
+      String str = (String) list.get(0);
+      Integer groupNumber = ConversionUtils.convert(list.get(2), Integer.class);
+
+      if (groupNumber == null) {
+        // group number was not a valid integer
+        return null;
+      }
+
+      if (groupNumber == 0) {
+        // 0, by default is the entire input
+        // default to returning a non-null
+        return str;
+      }
+
+      if (str == null || stringPattern == null) {
+        return null;
+      }
+      Pattern pattern = PatternCache.INSTANCE.getPattern(stringPattern);
+      Matcher matcher = pattern.matcher(str);
+      if (!matcher.matches()) {
+        return null;
+      }
+
+      int groupCount = matcher.groupCount();
+      if (groupCount == 0 || groupCount < groupNumber) {
+        return null;
+      }
+      return matcher.group(groupNumber);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java
----------------------------------------------------------------------
diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java
b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java
index 83099bd..289fa7f 100644
--- a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java
+++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java
@@ -34,29 +34,6 @@ import java.util.Map;
 
 public class StringFunctions {
 
-  @Stellar(name="REGEXP_MATCH"
-          ,description = "Determines whether a regex matches a string"
-          , params = {
-             "string - The string to test"
-            ,"pattern - The proposed regex pattern"
-            }
-          , returns = "True if the regex pattern matches the string and false if otherwise.")
-  public static class RegexpMatch extends BaseStellarFunction {
-
-    @Override
-    public Object apply(List<Object> list) {
-      if(list.size() < 2) {
-        throw new IllegalStateException("REGEXP_MATCH expects two args: [string, pattern]
where pattern is a regexp pattern");
-      }
-      String pattern = (String) list.get(1);
-      String str = (String) list.get(0);
-      if(str == null || pattern == null) {
-        return false;
-      }
-      return str.matches(pattern);
-    }
-  }
-
   @Stellar(name="ENDS_WITH"
           ,description = "Determines whether a string ends with a specified suffix"
           , params = {

http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java
----------------------------------------------------------------------
diff --git a/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java
b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java
new file mode 100644
index 0000000..9728bd2
--- /dev/null
+++ b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.metron.stellar.dsl.functions;
+
+import org.apache.metron.stellar.dsl.ParseException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.metron.stellar.common.utils.StellarProcessorUtils.runPredicate;
+
+public class RegExFunctionsTest {
+
+  // test RegExMatch
+  @Test
+  public void testRegExMatch() throws Exception {
+    final Map<String, String> variableMap = new HashMap<String, String>() {{
+      put("numbers", "12345");
+      put("numberPattern", "\\d(\\d)(\\d).*");
+      put("letters", "abcde");
+      put("empty", "");
+    }};
+
+    Assert.assertTrue(runPredicate("REGEXP_MATCH(numbers,numberPattern)", v -> variableMap.get(v)));
+    Assert.assertFalse(runPredicate("REGEXP_MATCH(letters,numberPattern)", v -> variableMap.get(v)));
+  }
+
+  @Test
+  public void testRegExGroupVal() throws Exception {
+    final Map<String, String> variableMap = new HashMap<String, String>() {{
+      put("numbers", "12345");
+      put("numberPattern", "\\d(\\d)(\\d).*");
+      put("numberPatternNoCaptures", "\\d\\d\\d.*");
+      put("letters", "abcde");
+      put("empty", "");
+    }};
+    Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(numbers,numberPattern,2) == '3'", v
-> variableMap.get(v)));
+    Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(letters,numberPattern,2) == null", v
-> variableMap.get(v)));
+    Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(empty,numberPattern,2) == null", v ->
variableMap.get(v)));
+    Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(numbers,numberPatternNoCaptures,2) ==
null", v -> variableMap.get(v)));
+
+    boolean caught = false;
+    try{
+      runPredicate("REGEXP_GROUP_VAL(2) == null", v -> variableMap.get(v));
+    }catch(ParseException | IllegalStateException ise){
+      caught = true;
+    }
+    if(!caught){
+      Assert.assertTrue("Did not fail on wrong number of parameters",false);
+    }
+  }
+}


Mime
View raw message