asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amo...@apache.org
Subject [2/2] asterixdb git commit: ASTERIXDB-1470 Fix escapes in String values in ClassAd Parser
Date Fri, 10 Jun 2016 21:42:55 GMT
ASTERIXDB-1470 Fix escapes in String values in ClassAd Parser

Change-Id: Id6e4e25263c5a6f0efe26773da7c3b8fcf7e2427
Reviewed-on: https://asterix-gerrit.ics.uci.edu/915
Reviewed-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <michael.blow@couchbase.com>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/e639c704
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/e639c704
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/e639c704

Branch: refs/heads/master
Commit: e639c70441b262acd826b94b5145b0a00ab4c5c6
Parents: 18d9580
Author: Abdullah Alamoudi <bamousaa@gmail.com>
Authored: Fri Jun 10 00:39:52 2016 +0300
Committer: abdullah alamoudi <bamousaa@gmail.com>
Committed: Fri Jun 10 14:41:16 2016 -0700

----------------------------------------------------------------------
 .../apache/asterix/external/classad/Util.java   |    89 +-
 .../external/classad/test/ClassAdToADMTest.java |    89 +-
 .../asterix/external/library/ClassAdParser.java |    11 +-
 .../src/test/resources/escapes.txt              |     4 +
 .../src/test/resources/jobads.txt               | 34801 +++++++++++------
 5 files changed, 22445 insertions(+), 12549 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/e639c704/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/Util.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/Util.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/Util.java
index cbecb1b..789cf1d 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/Util.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/Util.java
@@ -18,32 +18,36 @@
  */
 package org.apache.asterix.external.classad;
 
+import java.nio.charset.StandardCharsets;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.Random;
 import java.util.TimeZone;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.asterix.om.base.AMutableInt32;
 
 public class Util {
     // convert escapes in-place
     // the string can only shrink while converting escapes so we can safely convert in-place.
-    // needs verification
+    private static final Pattern OCTAL = Pattern.compile("\\\\([0-3][0-7]{0,2})");
+
     public static boolean convertEscapes(AMutableCharArrayString text) {
         boolean validStr = true;
-        if (text.getLength() == 0)
+        if (text.getLength() == 0) {
             return true;
-        int length = text.getLength();
+        }
         int dest = 0;
-        for (int source = 0; source < length; ++source) {
+        boolean hasOctal = false;
+        for (int source = 0; source < text.getLength(); ++source) {
             char ch = text.charAt(source);
             // scan for escapes, a terminating slash cannot be an escape
-            if (ch == '\\' && source < length - 1) {
+            if (ch == '\\' && source < text.getLength() - 1) {
                 ++source; // skip the \ character
                 ch = text.charAt(source);
-
                 // The escape part should be re-validated
                 switch (ch) {
                     case 'b':
@@ -66,29 +70,8 @@ public class Util {
                         break;
                     default:
                         if (Lexer.isodigit(ch)) {
-                            int number = ch - '0';
-                            // There can be up to 3 octal digits in an octal escape
-                            //  \[0..3]nn or \nn or \n. We quit at 3 characters or
-                            // at the first non-octal character.
-                            if (source + 1 < length) {
-                                char digit = text.charAt(source + 1); // is the next digit
also
-                                if (Lexer.isodigit(digit)) {
-                                    ++source;
-                                    number = (number << 3) + digit - '0';
-                                    if (number < 0x20 && source + 1 < length)
{
-                                        digit = text.charAt(source + 1);
-                                        if (Lexer.isodigit(digit)) {
-                                            ++source;
-                                            number = (number << 3) + digit - '0';
-                                        }
-                                    }
-                                }
-                            }
-                            if (ch == 0) { // "\\0" is an invalid substring within a string
literal
-                                validStr = false;
-                            }
-                        } else {
-                            // pass char after \ unmodified.
+                            hasOctal = true;
+                            ++dest;
                         }
                         break;
                 }
@@ -99,40 +82,44 @@ public class Util {
                 // text[dest] = ch;
                 ++dest;
             } else {
-                text.erase(dest);
-                text.setChar(dest, ch);
-                ++dest;
-                --source;
+                try {
+                    text.erase(dest);
+                    text.setChar(dest, ch);
+                    ++dest;
+                    --source;
+                } catch (Throwable th) {
+                    th.printStackTrace();
+                }
             }
         }
 
-        if (dest < length) {
+        if (dest < text.getLength()) {
             text.erase(dest);
-            length = dest;
+            text.setLength(dest);
         }
         // silly, but to fulfull the original contract for this function
         // we need to remove the last character in the string if it is a '\0'
         // (earlier logic guaranteed that a '\0' can ONLY be the last character)
-        if (length > 0 && !(text.charAt(length - 1) == '\0')) {
-            //text.erase(length - 1);
+        if (text.getLength() > 0 && (text.charAt(text.getLength() - 1) == '\0'))
{
+            text.erase(text.getLength() - 1);
         }
+        if (hasOctal) {
+            Matcher m = OCTAL.matcher(text.toString());
+            StringBuffer out = new StringBuffer();
+            while (m.find()) {
+                int octet = Integer.parseInt(m.group(1), 8);
+                if (octet == 0 || octet > 255) {
+                    return false;
+                }
+                m.appendReplacement(out, String.valueOf((char) octet));
+            }
+            m.appendTail(out);
+            text.setValue(new String(out.toString().getBytes(StandardCharsets.ISO_8859_1),
StandardCharsets.UTF_8));
+        }
+
         return validStr;
     }
 
-    /***************************************************************
-     * Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
-     * University of Wisconsin-Madison, WI.
-     * Licensed under the Apache License, Version 2.0 (the "License"); you
-     * may not use this file except in compliance with the License. You may
-     * obtain a copy of the License at
-     * http://www.apache.org/licenses/LICENSE-2.0
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     ***************************************************************/
-
     public static Random initialized = new Random((new Date()).getTime());
 
     public static int getRandomInteger() {

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/e639c704/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/test/ClassAdToADMTest.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/test/ClassAdToADMTest.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/test/ClassAdToADMTest.java
index 493bd3b..a4a64c4 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/test/ClassAdToADMTest.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/classad/test/ClassAdToADMTest.java
@@ -138,6 +138,44 @@ public class ClassAdToADMTest extends TestCase {
     }
 
     /**
+    *
+    */
+    public void testEscaping() {
+        try {
+            ClassAdObjectPool objectPool = new ClassAdObjectPool();
+            ClassAd pAd = new ClassAd(objectPool);
+            String[] files = new String[] { "/escapes.txt" };
+            ClassAdParser parser = new ClassAdParser(objectPool);
+            CharArrayLexerSource lexerSource = new CharArrayLexerSource();
+            for (String path : files) {
+                List<Path> paths = new ArrayList<>();
+                paths.add(Paths.get(getClass().getResource(path).toURI()));
+                FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
+                LocalFSInputStream in = new LocalFSInputStream(watcher);
+                SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader(in,
"[", "]");
+                try {
+                    Value val = new Value(objectPool);
+                    while (recordReader.hasNext()) {
+                        val.reset();
+                        IRawRecord<char[]> record = recordReader.next();
+                        lexerSource.setNewSource(record.get());
+                        parser.setLexerSource(lexerSource);
+                        parser.parseNext(pAd);
+                        Assert.assertEquals(
+                                "[ Args = \"“-1 0.1 0.1 0.5 2e-07 0.001 10 -1”\"; GlobalJobId
= \"submit-4.chtc.wisc.edu#3724038.0#1462893042\" ]",
+                                pAd.toString());
+                    }
+                } finally {
+                    recordReader.close();
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+            assertTrue(false);
+        }
+    }
+
+    /**
      *
      */
     public void testSchemaless() {
@@ -153,33 +191,36 @@ public class ClassAdToADMTest extends TestCase {
                 FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
                 LocalFSInputStream in = new LocalFSInputStream(watcher);
                 SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader(in,
"[", "]");
-                Value val = new Value(objectPool);
-                while (recordReader.hasNext()) {
-                    val.reset();
-                    IRawRecord<char[]> record = recordReader.next();
-                    lexerSource.setNewSource(record.get());
-                    parser.setLexerSource(lexerSource);
-                    parser.parseNext(pAd);
-                    Map<CaseInsensitiveString, ExprTree> attrs = pAd.getAttrList();
-                    for (Entry<CaseInsensitiveString, ExprTree> entry : attrs.entrySet())
{
-                        ExprTree tree = entry.getValue();
-                        switch (tree.getKind()) {
-                            case ATTRREF_NODE:
-                            case CLASSAD_NODE:
-                            case EXPR_ENVELOPE:
-                            case EXPR_LIST_NODE:
-                            case FN_CALL_NODE:
-                            case OP_NODE:
-                                break;
-                            case LITERAL_NODE:
-                                break;
-                            default:
-                                System.out.println("Something is wrong");
-                                break;
+                try {
+                    Value val = new Value(objectPool);
+                    while (recordReader.hasNext()) {
+                        val.reset();
+                        IRawRecord<char[]> record = recordReader.next();
+                        lexerSource.setNewSource(record.get());
+                        parser.setLexerSource(lexerSource);
+                        parser.parseNext(pAd);
+                        Map<CaseInsensitiveString, ExprTree> attrs = pAd.getAttrList();
+                        for (Entry<CaseInsensitiveString, ExprTree> entry : attrs.entrySet())
{
+                            ExprTree tree = entry.getValue();
+                            switch (tree.getKind()) {
+                                case ATTRREF_NODE:
+                                case CLASSAD_NODE:
+                                case EXPR_ENVELOPE:
+                                case EXPR_LIST_NODE:
+                                case FN_CALL_NODE:
+                                case OP_NODE:
+                                    break;
+                                case LITERAL_NODE:
+                                    break;
+                                default:
+                                    System.out.println("Something is wrong");
+                                    break;
+                            }
                         }
                     }
+                } finally {
+                    recordReader.close();
                 }
-                recordReader.close();
             }
         } catch (Exception e) {
             e.printStackTrace();

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/e639c704/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParser.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParser.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParser.java
index 48cbeb2..a5e422c 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParser.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParser.java
@@ -91,10 +91,12 @@ public class ClassAdParser extends AbstractDataParser implements IRecordDataPars
     private ARecordType recordType;
     private IObjectPool<IARecordBuilder, ATypeTag> recordBuilderPool = new ListObjectPool<IARecordBuilder,
ATypeTag>(
             new RecordBuilderFactory());
-    private IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool = new ListObjectPool<IAsterixListBuilder,
ATypeTag>(
-            new ListBuilderFactory());
-    private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = new ListObjectPool<IMutableValueStorage,
ATypeTag>(
-            new AbvsBuilderFactory());
+    private IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool =
+            new ListObjectPool<IAsterixListBuilder, ATypeTag>(
+                    new ListBuilderFactory());
+    private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool =
+            new ListObjectPool<IMutableValueStorage, ATypeTag>(
+                    new AbvsBuilderFactory());
     private final ClassAd rootAd;
     private String exprPrefix = "expr=";
     private String exprSuffix = "";
@@ -1574,7 +1576,6 @@ public class ClassAdParser extends AbstractDataParser implements IRecordDataPars
             }
 
             isExpr = false;
-            // parse the expression
             parseExpression(tree);
             if (tree.getInnerTree() == null) {
                 throw new HyracksDataException("parse expression returned empty tree");

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/e639c704/asterixdb/asterix-external-data/src/test/resources/escapes.txt
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-external-data/src/test/resources/escapes.txt b/asterixdb/asterix-external-data/src/test/resources/escapes.txt
new file mode 100644
index 0000000..50361e2
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/resources/escapes.txt
@@ -0,0 +1,4 @@
+[
+        Args = "\342\200\234-1 0.1 0.1 0.5 2e-07 0.001 10 -1\342\200\235";
+        GlobalJobId = "submit-4.chtc.wisc.edu#3724038.0#1462893042";
+    ]


Mime
View raw message