marmotta-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sschaff...@apache.org
Subject git commit: regexp optimizations
Date Mon, 29 Apr 2013 17:06:15 GMT
Updated Branches:
  refs/heads/develop 430b45a16 -> 288b90439


regexp optimizations


Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/288b9043
Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/288b9043
Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/288b9043

Branch: refs/heads/develop
Commit: 288b904395359759f5e429bfdad123a67344d56f
Parents: 430b45a
Author: Sebastian Schaffert <sschaffert@apache.org>
Authored: Mon Apr 29 19:06:03 2013 +0200
Committer: Sebastian Schaffert <sschaffert@apache.org>
Committed: Mon Apr 29 19:06:03 2013 +0200

----------------------------------------------------------------------
 .../sparql/persistence/KiWiSparqlConnection.java   |   63 ++++++++++++++-
 1 files changed, 62 insertions(+), 1 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/288b9043/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
index 1bd1227..e2f405c 100644
--- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
+++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java
@@ -44,6 +44,7 @@ import java.sql.SQLException;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.*;
+import java.util.regex.Pattern;
 
 /**
  * Provide improved SPARQL support by evaluating certain common compley SPARQL constructs
directly on the
@@ -354,7 +355,8 @@ public class KiWiSparqlConnection {
 
             // TODO: simplify the trivial cases to LIKE
 
-            return parent.getDialect().getRegexp(evaluateExpression(re.getArg(),queryVariables,
optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING));
+            //return parent.getDialect().getRegexp(evaluateExpression(re.getArg(),queryVariables,
optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING));
+            return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype),
evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING));
         } else if(expr instanceof LangMatches) {
             LangMatches lm = (LangMatches)expr;
             String value = evaluateExpression(lm.getLeftArg(), queryVariables, optype);
@@ -582,6 +584,65 @@ public class KiWiSparqlConnection {
         }
     }
 
+
+    /**
+     * Test if the regular expression given in the pattern can be simplified to a LIKE SQL
statement; these are
+     * considerably more efficient to evaluate in most databases, so in case we can simplify,
we return a LIKE.
+     *
+     * @param value
+     * @param pattern
+     * @return
+     */
+    private String optimizeRegexp(String value, String pattern) {
+        String simplified = pattern;
+
+        // apply simplifications
+
+        // remove SQL quotes at beginning and end
+        simplified = simplified.replaceFirst("^'","");
+        simplified = simplified.replaceFirst("'$","");
+
+
+        // remove .* at beginning and end, they are the default anyways
+        simplified = simplified.replaceFirst("^\\.\\*","");
+        simplified = simplified.replaceFirst("\\.\\*$","");
+
+        // replace all occurrences of % with \% and _ with \_, as they are special characters
in SQL
+        simplified = simplified.replaceAll("%","\\%");
+        simplified = simplified.replaceAll("_","\\_");
+
+        // if pattern now does not start with a ^, we put a "%" in front
+        if(!simplified.startsWith("^")) {
+            simplified = "%" + simplified;
+        } else {
+            simplified = simplified.substring(1);
+        }
+
+        // if pattern does not end with a "$", we put a "%" at the end
+        if(!simplified.endsWith("$")) {
+            simplified = simplified + "%";
+        } else {
+            simplified = simplified.substring(0,simplified.length()-2);
+        }
+
+        // replace all non-escaped occurrences of .* with %
+        simplified = simplified.replaceAll("(?<!\\\\)\\.\\*","%");
+
+        // replace all non-escaped occurrences of .+ with _%
+        simplified = simplified.replaceAll("(?<!\\\\)\\.\\+","_%");
+
+        // the pattern is not simplifiable if the simplification still contains unescaped
regular expression constructs
+        Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]");
+
+        if(notSimplifiable.matcher(simplified).find()) {
+            return parent.getDialect().getRegexp(value, pattern);
+        } else {
+            return value + " LIKE '"+simplified+"'";
+        }
+
+    }
+
+
     private static enum OPTypes {
         STRING, DOUBLE, INT, DATE, ANY
     }


Mime
View raw message