hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject [23/28] incubator-hawq git commit: HAWQ-837. Add python modules into HAWQ code
Date Tue, 21 Jun 2016 02:41:39 GMT
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/loader.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/loader.py b/tools/bin/ext/yaml/loader.py
new file mode 100644
index 0000000..29461f4
--- /dev/null
+++ b/tools/bin/ext/yaml/loader.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
+
+from reader import *
+from scanner import *
+from parser import *
+from composer import *
+from constructor import *
+from resolver import *
+
+class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
+
+    def __init__(self, stream):
+        Reader.__init__(self, stream)
+        Scanner.__init__(self)
+        Parser.__init__(self)
+        Composer.__init__(self)
+        BaseConstructor.__init__(self)
+        BaseResolver.__init__(self)
+
+class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
+
+    def __init__(self, stream):
+        Reader.__init__(self, stream)
+        Scanner.__init__(self)
+        Parser.__init__(self)
+        Composer.__init__(self)
+        SafeConstructor.__init__(self)
+        Resolver.__init__(self)
+
+class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
+
+    def __init__(self, stream):
+        Reader.__init__(self, stream)
+        Scanner.__init__(self)
+        Parser.__init__(self)
+        Composer.__init__(self)
+        Constructor.__init__(self)
+        Resolver.__init__(self)
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/nodes.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/nodes.py b/tools/bin/ext/yaml/nodes.py
new file mode 100644
index 0000000..3fdfbe6
--- /dev/null
+++ b/tools/bin/ext/yaml/nodes.py
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class Node(object):
+    def __init__(self, tag, value, start_mark, end_mark):
+        self.tag = tag
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+    def __repr__(self):
+        value = self.value
+        #if isinstance(value, list):
+        #    if len(value) == 0:
+        #        value = '<empty>'
+        #    elif len(value) == 1:
+        #        value = '<1 item>'
+        #    else:
+        #        value = '<%d items>' % len(value)
+        #else:
+        #    if len(value) > 75:
+        #        value = repr(value[:70]+u' ... ')
+        #    else:
+        #        value = repr(value)
+        value = repr(value)
+        return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
+
+class ScalarNode(Node):
+    id = 'scalar'
+    def __init__(self, tag, value,
+            start_mark=None, end_mark=None, style=None):
+        self.tag = tag
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+        self.style = style
+
+class CollectionNode(Node):
+    def __init__(self, tag, value,
+            start_mark=None, end_mark=None, flow_style=None):
+        self.tag = tag
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+        self.flow_style = flow_style
+
+class SequenceNode(CollectionNode):
+    id = 'sequence'
+
+class MappingNode(CollectionNode):
+    id = 'mapping'
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/parser.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/parser.py b/tools/bin/ext/yaml/parser.py
new file mode 100644
index 0000000..a1228f6
--- /dev/null
+++ b/tools/bin/ext/yaml/parser.py
@@ -0,0 +1,602 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The following YAML grammar is LL(1) and is parsed by a recursive descent
+# parser.
+#
+# stream            ::= STREAM-START implicit_document? explicit_document* STREAM-END
+# implicit_document ::= block_node DOCUMENT-END*
+# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+# block_node_or_indentless_sequence ::=
+#                       ALIAS
+#                       | properties (block_content | indentless_block_sequence)?
+#                       | block_content
+#                       | indentless_block_sequence
+# block_node        ::= ALIAS
+#                       | properties block_content?
+#                       | block_content
+# flow_node         ::= ALIAS
+#                       | properties flow_content?
+#                       | flow_content
+# properties        ::= TAG ANCHOR? | ANCHOR TAG?
+# block_content     ::= block_collection | flow_collection | SCALAR
+# flow_content      ::= flow_collection | SCALAR
+# block_collection  ::= block_sequence | block_mapping
+# flow_collection   ::= flow_sequence | flow_mapping
+# block_sequence    ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+# indentless_sequence   ::= (BLOCK-ENTRY block_node?)+
+# block_mapping     ::= BLOCK-MAPPING_START
+#                       ((KEY block_node_or_indentless_sequence?)?
+#                       (VALUE block_node_or_indentless_sequence?)?)*
+#                       BLOCK-END
+# flow_sequence     ::= FLOW-SEQUENCE-START
+#                       (flow_sequence_entry FLOW-ENTRY)*
+#                       flow_sequence_entry?
+#                       FLOW-SEQUENCE-END
+# flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+# flow_mapping      ::= FLOW-MAPPING-START
+#                       (flow_mapping_entry FLOW-ENTRY)*
+#                       flow_mapping_entry?
+#                       FLOW-MAPPING-END
+# flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+#
+# FIRST sets:
+#
+# stream: { STREAM-START }
+# explicit_document: { DIRECTIVE DOCUMENT-START }
+# implicit_document: FIRST(block_node)
+# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_sequence: { BLOCK-SEQUENCE-START }
+# block_mapping: { BLOCK-MAPPING-START }
+# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
+# indentless_sequence: { ENTRY }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_sequence: { FLOW-SEQUENCE-START }
+# flow_mapping: { FLOW-MAPPING-START }
+# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+
+__all__ = ['Parser', 'ParserError']
+
+from error import MarkedYAMLError
+from tokens import *
+from events import *
+from scanner import *
+
+class ParserError(MarkedYAMLError):
+    pass
+
+class Parser(object):
+    # Since writing a recursive-descendant parser is a straightforward task, we
+    # do not give many comments here.
+    # Note that we use Python generators. If you rewrite the parser in another
+    # language, you may replace all 'yield'-s with event handler calls.
+
+    DEFAULT_TAGS = {
+        u'!':   u'!',
+        u'!!':  u'tag:yaml.org,2002:',
+    }
+
+    def __init__(self):
+        self.current_event = None
+        self.yaml_version = None
+        self.tag_handles = {}
+        self.states = []
+        self.marks = []
+        self.state = self.parse_stream_start
+
+    def check_event(self, *choices):
+        # Check the type of the next event.
+        if self.current_event is None:
+            if self.state:
+                self.current_event = self.state()
+        if self.current_event is not None:
+            if not choices:
+                return True
+            for choice in choices:
+                if isinstance(self.current_event, choice):
+                    return True
+        return False
+
+    def peek_event(self):
+        # Get the next event.
+        if self.current_event is None:
+            if self.state:
+                self.current_event = self.state()
+        return self.current_event
+
+    def get_event(self):
+        # Get the next event and proceed further.
+        if self.current_event is None:
+            if self.state:
+                self.current_event = self.state()
+        value = self.current_event
+        self.current_event = None
+        return value
+
+    # stream    ::= STREAM-START implicit_document? explicit_document* STREAM-END
+    # implicit_document ::= block_node DOCUMENT-END*
+    # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+
+    def parse_stream_start(self):
+
+        # Parse the stream start.
+        token = self.get_token()
+        event = StreamStartEvent(token.start_mark, token.end_mark,
+                encoding=token.encoding)
+
+        # Prepare the next state.
+        self.state = self.parse_implicit_document_start
+
+        return event
+
+    def parse_implicit_document_start(self):
+
+        # Parse an implicit document.
+        if not self.check_token(DirectiveToken, DocumentStartToken,
+                StreamEndToken):
+            self.tag_handles = self.DEFAULT_TAGS
+            token = self.peek_token()
+            start_mark = end_mark = token.start_mark
+            event = DocumentStartEvent(start_mark, end_mark,
+                    explicit=False)
+
+            # Prepare the next state.
+            self.states.append(self.parse_document_end)
+            self.state = self.parse_block_node
+
+            return event
+
+        else:
+            return self.parse_document_start()
+
+    def parse_document_start(self):
+
+        # Parse any extra document end indicators.
+        while self.check_token(DocumentEndToken):
+            self.get_token()
+
+        # Parse an explicit document.
+        if not self.check_token(StreamEndToken):
+            token = self.peek_token()
+            start_mark = token.start_mark
+            version, tags = self.process_directives()
+            if not self.check_token(DocumentStartToken):
+                raise ParserError(None, None,
+                        "expected '<document start>', but found %r"
+                        % self.peek_token().id,
+                        self.peek_token().start_mark)
+            token = self.get_token()
+            end_mark = token.end_mark
+            event = DocumentStartEvent(start_mark, end_mark,
+                    explicit=True, version=version, tags=tags)
+            self.states.append(self.parse_document_end)
+            self.state = self.parse_document_content
+        else:
+            # Parse the end of the stream.
+            token = self.get_token()
+            event = StreamEndEvent(token.start_mark, token.end_mark)
+            assert not self.states
+            assert not self.marks
+            self.state = None
+        return event
+
+    def parse_document_end(self):
+
+        # Parse the document end.
+        token = self.peek_token()
+        start_mark = end_mark = token.start_mark
+        explicit = False
+        if self.check_token(DocumentEndToken):
+            token = self.get_token()
+            end_mark = token.end_mark
+            explicit = True
+        event = DocumentEndEvent(start_mark, end_mark,
+                explicit=explicit)
+
+        # Prepare the next state.
+        self.state = self.parse_document_start
+
+        return event
+
+    def parse_document_content(self):
+        if self.check_token(DirectiveToken,
+                DocumentStartToken, DocumentEndToken, StreamEndToken):
+            event = self.process_empty_scalar(self.peek_token().start_mark)
+            self.state = self.states.pop()
+            return event
+        else:
+            return self.parse_block_node()
+
+    def process_directives(self):
+        self.yaml_version = None
+        self.tag_handles = {}
+        while self.check_token(DirectiveToken):
+            token = self.get_token()
+            if token.name == u'YAML':
+                if self.yaml_version is not None:
+                    raise ParserError(None, None,
+                            "found duplicate YAML directive", token.start_mark)
+                major, minor = token.value
+                if major != 1:
+                    raise ParserError(None, None,
+                            "found incompatible YAML document (version 1.* is required)",
+                            token.start_mark)
+                self.yaml_version = token.value
+            elif token.name == u'TAG':
+                handle, prefix = token.value
+                if handle in self.tag_handles:
+                    raise ParserError(None, None,
+                            "duplicate tag handle %r" % handle.encode('utf-8'),
+                            token.start_mark)
+                self.tag_handles[handle] = prefix
+        if self.tag_handles:
+            value = self.yaml_version, self.tag_handles.copy()
+        else:
+            value = self.yaml_version, None
+        for key in self.DEFAULT_TAGS:
+            if key not in self.tag_handles:
+                self.tag_handles[key] = self.DEFAULT_TAGS[key]
+        return value
+
+    # block_node_or_indentless_sequence ::= ALIAS
+    #               | properties (block_content | indentless_block_sequence)?
+    #               | block_content
+    #               | indentless_block_sequence
+    # block_node    ::= ALIAS
+    #                   | properties block_content?
+    #                   | block_content
+    # flow_node     ::= ALIAS
+    #                   | properties flow_content?
+    #                   | flow_content
+    # properties    ::= TAG ANCHOR? | ANCHOR TAG?
+    # block_content     ::= block_collection | flow_collection | SCALAR
+    # flow_content      ::= flow_collection | SCALAR
+    # block_collection  ::= block_sequence | block_mapping
+    # flow_collection   ::= flow_sequence | flow_mapping
+
+    def parse_block_node(self):
+        return self.parse_node(block=True)
+
+    def parse_flow_node(self):
+        return self.parse_node()
+
+    def parse_block_node_or_indentless_sequence(self):
+        return self.parse_node(block=True, indentless_sequence=True)
+
+    def parse_node(self, block=False, indentless_sequence=False):
+        if self.check_token(AliasToken):
+            token = self.get_token()
+            event = AliasEvent(token.value, token.start_mark, token.end_mark)
+            self.state = self.states.pop()
+        else:
+            anchor = None
+            tag = None
+            start_mark = end_mark = tag_mark = None
+            if self.check_token(AnchorToken):
+                token = self.get_token()
+                start_mark = token.start_mark
+                end_mark = token.end_mark
+                anchor = token.value
+                if self.check_token(TagToken):
+                    token = self.get_token()
+                    tag_mark = token.start_mark
+                    end_mark = token.end_mark
+                    tag = token.value
+            elif self.check_token(TagToken):
+                token = self.get_token()
+                start_mark = tag_mark = token.start_mark
+                end_mark = token.end_mark
+                tag = token.value
+                if self.check_token(AnchorToken):
+                    token = self.get_token()
+                    end_mark = token.end_mark
+                    anchor = token.value
+            if tag is not None:
+                handle, suffix = tag
+                if handle is not None:
+                    if handle not in self.tag_handles:
+                        raise ParserError("while parsing a node", start_mark,
+                                "found undefined tag handle %r" % handle.encode('utf-8'),
+                                tag_mark)
+                    tag = self.tag_handles[handle]+suffix
+                else:
+                    tag = suffix
+            #if tag == u'!':
+            #    raise ParserError("while parsing a node", start_mark,
+            #            "found non-specific tag '!'", tag_mark,
+            #            "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
+            if start_mark is None:
+                start_mark = end_mark = self.peek_token().start_mark
+            event = None
+            implicit = (tag is None or tag == u'!')
+            if indentless_sequence and self.check_token(BlockEntryToken):
+                end_mark = self.peek_token().end_mark
+                event = SequenceStartEvent(anchor, tag, implicit,
+                        start_mark, end_mark)
+                self.state = self.parse_indentless_sequence_entry
+            else:
+                if self.check_token(ScalarToken):
+                    token = self.get_token()
+                    end_mark = token.end_mark
+                    if (token.plain and tag is None) or tag == u'!':
+                        implicit = (True, False)
+                    elif tag is None:
+                        implicit = (False, True)
+                    else:
+                        implicit = (False, False)
+                    event = ScalarEvent(anchor, tag, implicit, token.value,
+                            start_mark, end_mark, style=token.style)
+                    self.state = self.states.pop()
+                elif self.check_token(FlowSequenceStartToken):
+                    end_mark = self.peek_token().end_mark
+                    event = SequenceStartEvent(anchor, tag, implicit,
+                            start_mark, end_mark, flow_style=True)
+                    self.state = self.parse_flow_sequence_first_entry
+                elif self.check_token(FlowMappingStartToken):
+                    end_mark = self.peek_token().end_mark
+                    event = MappingStartEvent(anchor, tag, implicit,
+                            start_mark, end_mark, flow_style=True)
+                    self.state = self.parse_flow_mapping_first_key
+                elif block and self.check_token(BlockSequenceStartToken):
+                    end_mark = self.peek_token().start_mark
+                    event = SequenceStartEvent(anchor, tag, implicit,
+                            start_mark, end_mark, flow_style=False)
+                    self.state = self.parse_block_sequence_first_entry
+                elif block and self.check_token(BlockMappingStartToken):
+                    end_mark = self.peek_token().start_mark
+                    event = MappingStartEvent(anchor, tag, implicit,
+                            start_mark, end_mark, flow_style=False)
+                    self.state = self.parse_block_mapping_first_key
+                elif anchor is not None or tag is not None:
+                    # Empty scalars are allowed even if a tag or an anchor is
+                    # specified.
+                    event = ScalarEvent(anchor, tag, (implicit, False), u'',
+                            start_mark, end_mark)
+                    self.state = self.states.pop()
+                else:
+                    if block:
+                        node = 'block'
+                    else:
+                        node = 'flow'
+                    token = self.peek_token()
+                    raise ParserError("while parsing a %s node" % node, start_mark,
+                            "expected the node content, but found %r" % token.id,
+                            token.start_mark)
+        return event
+
+    # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+
+    def parse_block_sequence_first_entry(self):
+        token = self.get_token()
+        self.marks.append(token.start_mark)
+        return self.parse_block_sequence_entry()
+
+    def parse_block_sequence_entry(self):
+        if self.check_token(BlockEntryToken):
+            token = self.get_token()
+            if not self.check_token(BlockEntryToken, BlockEndToken):
+                self.states.append(self.parse_block_sequence_entry)
+                return self.parse_block_node()
+            else:
+                self.state = self.parse_block_sequence_entry
+                return self.process_empty_scalar(token.end_mark)
+        if not self.check_token(BlockEndToken):
+            token = self.peek_token()
+            raise ParserError("while parsing a block collection", self.marks[-1],
+                    "expected <block end>, but found %r" % token.id, token.start_mark)
+        token = self.get_token()
+        event = SequenceEndEvent(token.start_mark, token.end_mark)
+        self.state = self.states.pop()
+        self.marks.pop()
+        return event
+
+    # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
+
+    def parse_indentless_sequence_entry(self):
+        if self.check_token(BlockEntryToken):
+            token = self.get_token()
+            if not self.check_token(BlockEntryToken,
+                    KeyToken, ValueToken, BlockEndToken):
+                self.states.append(self.parse_indentless_sequence_entry)
+                return self.parse_block_node()
+            else:
+                self.state = self.parse_indentless_sequence_entry
+                return self.process_empty_scalar(token.end_mark)
+        token = self.peek_token()
+        event = SequenceEndEvent(token.start_mark, token.start_mark)
+        self.state = self.states.pop()
+        return event
+
+    # block_mapping     ::= BLOCK-MAPPING_START
+    #                       ((KEY block_node_or_indentless_sequence?)?
+    #                       (VALUE block_node_or_indentless_sequence?)?)*
+    #                       BLOCK-END
+
+    def parse_block_mapping_first_key(self):
+        token = self.get_token()
+        self.marks.append(token.start_mark)
+        return self.parse_block_mapping_key()
+
+    def parse_block_mapping_key(self):
+        if self.check_token(KeyToken):
+            token = self.get_token()
+            if not self.check_token(KeyToken, ValueToken, BlockEndToken):
+                self.states.append(self.parse_block_mapping_value)
+                return self.parse_block_node_or_indentless_sequence()
+            else:
+                self.state = self.parse_block_mapping_value
+                return self.process_empty_scalar(token.end_mark)
+        if not self.check_token(BlockEndToken):
+            token = self.peek_token()
+            raise ParserError("while parsing a block mapping", self.marks[-1],
+                    "expected <block end>, but found %r" % token.id, token.start_mark)
+        token = self.get_token()
+        event = MappingEndEvent(token.start_mark, token.end_mark)
+        self.state = self.states.pop()
+        self.marks.pop()
+        return event
+
+    def parse_block_mapping_value(self):
+        if self.check_token(ValueToken):
+            token = self.get_token()
+            if not self.check_token(KeyToken, ValueToken, BlockEndToken):
+                self.states.append(self.parse_block_mapping_key)
+                return self.parse_block_node_or_indentless_sequence()
+            else:
+                self.state = self.parse_block_mapping_key
+                return self.process_empty_scalar(token.end_mark)
+        else:
+            self.state = self.parse_block_mapping_key
+            token = self.peek_token()
+            return self.process_empty_scalar(token.start_mark)
+
+    # flow_sequence     ::= FLOW-SEQUENCE-START
+    #                       (flow_sequence_entry FLOW-ENTRY)*
+    #                       flow_sequence_entry?
+    #                       FLOW-SEQUENCE-END
+    # flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+    #
+    # Note that while production rules for both flow_sequence_entry and
+    # flow_mapping_entry are equal, their interpretations are different.
+    # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
+    # generate an inline mapping (set syntax).
+
+    def parse_flow_sequence_first_entry(self):
+        token = self.get_token()
+        self.marks.append(token.start_mark)
+        return self.parse_flow_sequence_entry(first=True)
+
+    def parse_flow_sequence_entry(self, first=False):
+        if not self.check_token(FlowSequenceEndToken):
+            if not first:
+                if self.check_token(FlowEntryToken):
+                    self.get_token()
+                else:
+                    token = self.peek_token()
+                    raise ParserError("while parsing a flow sequence", self.marks[-1],
+                            "expected ',' or ']', but got %r" % token.id, token.start_mark)
+            
+            if self.check_token(KeyToken):
+                token = self.peek_token()
+                event = MappingStartEvent(None, None, True,
+                        token.start_mark, token.end_mark,
+                        flow_style=True)
+                self.state = self.parse_flow_sequence_entry_mapping_key
+                return event
+            elif not self.check_token(FlowSequenceEndToken):
+                self.states.append(self.parse_flow_sequence_entry)
+                return self.parse_flow_node()
+        token = self.get_token()
+        event = SequenceEndEvent(token.start_mark, token.end_mark)
+        self.state = self.states.pop()
+        self.marks.pop()
+        return event
+
+    def parse_flow_sequence_entry_mapping_key(self):
+        token = self.get_token()
+        if not self.check_token(ValueToken,
+                FlowEntryToken, FlowSequenceEndToken):
+            self.states.append(self.parse_flow_sequence_entry_mapping_value)
+            return self.parse_flow_node()
+        else:
+            self.state = self.parse_flow_sequence_entry_mapping_value
+            return self.process_empty_scalar(token.end_mark)
+
+    def parse_flow_sequence_entry_mapping_value(self):
+        if self.check_token(ValueToken):
+            token = self.get_token()
+            if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
+                self.states.append(self.parse_flow_sequence_entry_mapping_end)
+                return self.parse_flow_node()
+            else:
+                self.state = self.parse_flow_sequence_entry_mapping_end
+                return self.process_empty_scalar(token.end_mark)
+        else:
+            self.state = self.parse_flow_sequence_entry_mapping_end
+            token = self.peek_token()
+            return self.process_empty_scalar(token.start_mark)
+
+    def parse_flow_sequence_entry_mapping_end(self):
+        self.state = self.parse_flow_sequence_entry
+        token = self.peek_token()
+        return MappingEndEvent(token.start_mark, token.start_mark)
+
+    # flow_mapping  ::= FLOW-MAPPING-START
+    #                   (flow_mapping_entry FLOW-ENTRY)*
+    #                   flow_mapping_entry?
+    #                   FLOW-MAPPING-END
+    # flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+
+    def parse_flow_mapping_first_key(self):
+        token = self.get_token()
+        self.marks.append(token.start_mark)
+        return self.parse_flow_mapping_key(first=True)
+
+    def parse_flow_mapping_key(self, first=False):
+        if not self.check_token(FlowMappingEndToken):
+            if not first:
+                if self.check_token(FlowEntryToken):
+                    self.get_token()
+                else:
+                    token = self.peek_token()
+                    raise ParserError("while parsing a flow mapping", self.marks[-1],
+                            "expected ',' or '}', but got %r" % token.id, token.start_mark)
+            if self.check_token(KeyToken):
+                token = self.get_token()
+                if not self.check_token(ValueToken,
+                        FlowEntryToken, FlowMappingEndToken):
+                    self.states.append(self.parse_flow_mapping_value)
+                    return self.parse_flow_node()
+                else:
+                    self.state = self.parse_flow_mapping_value
+                    return self.process_empty_scalar(token.end_mark)
+            elif not self.check_token(FlowMappingEndToken):
+                self.states.append(self.parse_flow_mapping_empty_value)
+                return self.parse_flow_node()
+        token = self.get_token()
+        event = MappingEndEvent(token.start_mark, token.end_mark)
+        self.state = self.states.pop()
+        self.marks.pop()
+        return event
+
+    def parse_flow_mapping_value(self):
+        if self.check_token(ValueToken):
+            token = self.get_token()
+            if not self.check_token(FlowEntryToken, FlowMappingEndToken):
+                self.states.append(self.parse_flow_mapping_key)
+                return self.parse_flow_node()
+            else:
+                self.state = self.parse_flow_mapping_key
+                return self.process_empty_scalar(token.end_mark)
+        else:
+            self.state = self.parse_flow_mapping_key
+            token = self.peek_token()
+            return self.process_empty_scalar(token.start_mark)
+
+    def parse_flow_mapping_empty_value(self):
+        self.state = self.parse_flow_mapping_key
+        return self.process_empty_scalar(self.peek_token().start_mark)
+
+    def process_empty_scalar(self, mark):
+        return ScalarEvent(None, None, (True, False), u'', mark, mark)
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/reader.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/reader.py b/tools/bin/ext/yaml/reader.py
new file mode 100644
index 0000000..f36e647
--- /dev/null
+++ b/tools/bin/ext/yaml/reader.py
@@ -0,0 +1,241 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# This module contains abstractions for the input stream. You don't have to
+# looks further, there are no pretty code.
+#
+# We define two classes here.
+#
+#   Mark(source, line, column)
+# It's just a record and its only use is producing nice error messages.
+# Parser does not use it for any other purposes.
+#
+#   Reader(source, data)
+# Reader determines the encoding of `data` and converts it to unicode.
+# Reader provides the following methods and attributes:
+#   reader.peek(length=1) - return the next `length` characters
+#   reader.forward(length=1) - move the current position to `length` characters.
+#   reader.index - the number of the current character.
+#   reader.line, stream.column - the line and the column of the current character.
+
+__all__ = ['Reader', 'ReaderError']
+
+from error import YAMLError, Mark
+
+import codecs, re
+
+# Unfortunately, codec functions in Python 2.3 does not support the `finish`
+# arguments, so we have to write our own wrappers.
+
+try:
+    codecs.utf_8_decode('', 'strict', False)
+    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
+
+except TypeError:
+
+    def utf_16_le_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_le_decode(data, errors)
+
+    def utf_16_be_decode(data, errors, finish=False):
+        if not finish and len(data) % 2 == 1:
+            data = data[:-1]
+        return codecs.utf_16_be_decode(data, errors)
+
+    def utf_8_decode(data, errors, finish=False):
+        if not finish:
+            # We are trying to remove a possible incomplete multibyte character
+            # from the suffix of the data.
+            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
+            # All further bytes are in the range 0x80 to 0xbf.
+            # UTF-8 encoded UCS characters may be up to six bytes long.
+            count = 0
+            while count < 5 and count < len(data)   \
+                    and '\x80' <= data[-count-1] <= '\xBF':
+                count -= 1
+            if count < 5 and count < len(data)  \
+                    and '\xC0' <= data[-count-1] <= '\xFD':
+                data = data[:-count-1]
+        return codecs.utf_8_decode(data, errors)
+
+class ReaderError(YAMLError):
+
+    def __init__(self, name, position, character, encoding, reason):
+        self.name = name
+        self.character = character
+        self.position = position
+        self.encoding = encoding
+        self.reason = reason
+
+    def __str__(self):
+        if isinstance(self.character, str):
+            return "'%s' codec can't decode byte #x%02x: %s\n"  \
+                    "  in \"%s\", position %d"    \
+                    % (self.encoding, ord(self.character), self.reason,
+                            self.name, self.position)
+        else:
+            return "unacceptable character #x%04x: %s\n"    \
+                    "  in \"%s\", position %d"    \
+                    % (ord(self.character), self.reason,
+                            self.name, self.position)
+
+class Reader(object):
+    # Reader:
+    # - determines the data encoding and converts it to unicode,
+    # - checks if characters are in allowed range,
+    # - adds '\0' to the end.
+
+    # Reader accepts
+    #  - a `str` object,
+    #  - a `unicode` object,
+    #  - a file-like object with its `read` method returning `str`,
+    #  - a file-like object with its `read` method returning `unicode`.
+
+    # Yeah, it's ugly and slow.
+
+    def __init__(self, stream):
+        self.name = None
+        self.stream = None
+        self.stream_pointer = 0
+        self.eof = True
+        self.buffer = u''
+        self.pointer = 0
+        self.raw_buffer = None
+        self.raw_decode = None
+        self.encoding = None
+        self.index = 0
+        self.line = 0
+        self.column = 0
+        if isinstance(stream, unicode):
+            self.name = "<unicode string>"
+            self.check_printable(stream)
+            self.buffer = stream+u'\0'
+        elif isinstance(stream, str):
+            self.name = "<string>"
+            self.raw_buffer = stream
+            self.determine_encoding()
+        else:
+            self.stream = stream
+            self.name = getattr(stream, 'name', "<file>")
+            self.eof = False
+            self.raw_buffer = ''
+            self.determine_encoding()
+
+    def peek(self, index=0):
+        try:
+            return self.buffer[self.pointer+index]
+        except IndexError:
+            self.update(index+1)
+            return self.buffer[self.pointer+index]
+
+    def prefix(self, length=1):
+        if self.pointer+length >= len(self.buffer):
+            self.update(length)
+        return self.buffer[self.pointer:self.pointer+length]
+
+    def forward(self, length=1):
+        if self.pointer+length+1 >= len(self.buffer):
+            self.update(length+1)
+        while length:
+            ch = self.buffer[self.pointer]
+            self.pointer += 1
+            self.index += 1
+            if ch in u'\n\x85\u2028\u2029'  \
+                    or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
+                self.line += 1
+                self.column = 0
+            elif ch != u'\uFEFF':
+                self.column += 1
+            length -= 1
+
+    def get_mark(self):
+        if self.stream is None:
+            return Mark(self.name, self.index, self.line, self.column,
+                    self.buffer, self.pointer)
+        else:
+            return Mark(self.name, self.index, self.line, self.column,
+                    None, None)
+
+    def determine_encoding(self):
+        while not self.eof and len(self.raw_buffer) < 2:
+            self.update_raw()
+        if not isinstance(self.raw_buffer, unicode):
+            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
+                self.raw_decode = utf_16_le_decode
+                self.encoding = 'utf-16-le'
+            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
+                self.raw_decode = utf_16_be_decode
+                self.encoding = 'utf-16-be'
+            else:
+                self.raw_decode = utf_8_decode
+                self.encoding = 'utf-8'
+        self.update(1)
+
+    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+    def check_printable(self, data):
+        match = self.NON_PRINTABLE.search(data)
+        if match:
+            character = match.group()
+            position = self.index+(len(self.buffer)-self.pointer)+match.start()
+            raise ReaderError(self.name, position, character,
+                    'unicode', "special characters are not allowed")
+
+    def update(self, length):
+        if self.raw_buffer is None:
+            return
+        self.buffer = self.buffer[self.pointer:]
+        self.pointer = 0
+        while len(self.buffer) < length:
+            if not self.eof:
+                self.update_raw()
+            if self.raw_decode is not None:
+                try:
+                    data, converted = self.raw_decode(self.raw_buffer,
+                            'strict', self.eof)
+                except UnicodeDecodeError, exc:
+                    character = exc.object[exc.start]
+                    if self.stream is not None:
+                        position = self.stream_pointer-len(self.raw_buffer)+exc.start
+                    else:
+                        position = exc.start
+                    raise ReaderError(self.name, position, character,
+                            exc.encoding, exc.reason)
+            else:
+                data = self.raw_buffer
+                converted = len(data)
+            self.check_printable(data)
+            self.buffer += data
+            self.raw_buffer = self.raw_buffer[converted:]
+            if self.eof:
+                self.buffer += u'\0'
+                self.raw_buffer = None
+                break
+
+    def update_raw(self, size=1024):
+        data = self.stream.read(size)
+        if data:
+            self.raw_buffer += data
+            self.stream_pointer += len(data)
+        else:
+            self.eof = True
+
+#try:
+#    import psyco
+#    psyco.bind(Reader)
+#except ImportError:
+#    pass
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/representer.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/representer.py b/tools/bin/ext/yaml/representer.py
new file mode 100644
index 0000000..b83cc4b
--- /dev/null
+++ b/tools/bin/ext/yaml/representer.py
@@ -0,0 +1,504 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
+    'RepresenterError']
+
+from error import *
+from nodes import *
+
+import datetime
+
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+import sys, copy_reg, types
+
+class RepresenterError(YAMLError):
+    pass
+
+class BaseRepresenter(object):
+
+    yaml_representers = {}
+    yaml_multi_representers = {}
+
+    def __init__(self, default_style=None, default_flow_style=None):
+        self.default_style = default_style
+        self.default_flow_style = default_flow_style
+        self.represented_objects = {}
+        self.object_keeper = []
+        self.alias_key = None
+
+    def represent(self, data):
+        node = self.represent_data(data)
+        self.serialize(node)
+        self.represented_objects = {}
+        self.object_keeper = []
+        self.alias_key = None
+
+    def get_classobj_bases(self, cls):
+        bases = [cls]
+        for base in cls.__bases__:
+            bases.extend(self.get_classobj_bases(base))
+        return bases
+
+    def represent_data(self, data):
+        if self.ignore_aliases(data):
+            self.alias_key = None
+        else:
+            self.alias_key = id(data)
+        if self.alias_key is not None:
+            if self.alias_key in self.represented_objects:
+                node = self.represented_objects[self.alias_key]
+                #if node is None:
+                #    raise RepresenterError("recursive objects are not allowed: %r" % data)
+                return node
+            #self.represented_objects[alias_key] = None
+            self.object_keeper.append(data)
+        data_types = type(data).__mro__
+        if type(data) is types.InstanceType:
+            data_types = self.get_classobj_bases(data.__class__)+list(data_types)
+        if data_types[0] in self.yaml_representers:
+            node = self.yaml_representers[data_types[0]](self, data)
+        else:
+            for data_type in data_types:
+                if data_type in self.yaml_multi_representers:
+                    node = self.yaml_multi_representers[data_type](self, data)
+                    break
+            else:
+                if None in self.yaml_multi_representers:
+                    node = self.yaml_multi_representers[None](self, data)
+                elif None in self.yaml_representers:
+                    node = self.yaml_representers[None](self, data)
+                else:
+                    node = ScalarNode(None, unicode(data))
+        #if alias_key is not None:
+        #    self.represented_objects[alias_key] = node
+        return node
+
+    def add_representer(cls, data_type, representer):
+        if not 'yaml_representers' in cls.__dict__:
+            cls.yaml_representers = cls.yaml_representers.copy()
+        cls.yaml_representers[data_type] = representer
+    add_representer = classmethod(add_representer)
+
+    def add_multi_representer(cls, data_type, representer):
+        if not 'yaml_multi_representers' in cls.__dict__:
+            cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
+        cls.yaml_multi_representers[data_type] = representer
+    add_multi_representer = classmethod(add_multi_representer)
+
+    def represent_scalar(self, tag, value, style=None):
+        if style is None:
+            style = self.default_style
+        node = ScalarNode(tag, value, style=style)
+        if self.alias_key is not None:
+            self.represented_objects[self.alias_key] = node
+        return node
+
+    def represent_sequence(self, tag, sequence, flow_style=None):
+        value = []
+        node = SequenceNode(tag, value, flow_style=flow_style)
+        if self.alias_key is not None:
+            self.represented_objects[self.alias_key] = node
+        best_style = True
+        for item in sequence:
+            node_item = self.represent_data(item)
+            if not (isinstance(node_item, ScalarNode) and not node_item.style):
+                best_style = False
+            value.append(node_item)
+        if flow_style is None:
+            if self.default_flow_style is not None:
+                node.flow_style = self.default_flow_style
+            else:
+                node.flow_style = best_style
+        return node
+
+    def represent_mapping(self, tag, mapping, flow_style=None):
+        value = []
+        node = MappingNode(tag, value, flow_style=flow_style)
+        if self.alias_key is not None:
+            self.represented_objects[self.alias_key] = node
+        best_style = True
+        if hasattr(mapping, 'items'):
+            mapping = mapping.items()
+            mapping.sort()
+        for item_key, item_value in mapping:
+            node_key = self.represent_data(item_key)
+            node_value = self.represent_data(item_value)
+            if not (isinstance(node_key, ScalarNode) and not node_key.style):
+                best_style = False
+            if not (isinstance(node_value, ScalarNode) and not node_value.style):
+                best_style = False
+            value.append((node_key, node_value))
+        if flow_style is None:
+            if self.default_flow_style is not None:
+                node.flow_style = self.default_flow_style
+            else:
+                node.flow_style = best_style
+        return node
+
+    def ignore_aliases(self, data):
+        return False
+
+class SafeRepresenter(BaseRepresenter):
+
+    def ignore_aliases(self, data):
+        if data in [None, ()]:
+            return True
+        if isinstance(data, (str, unicode, bool, int, float)):
+            return True
+
+    def represent_none(self, data):
+        return self.represent_scalar(u'tag:yaml.org,2002:null',
+                u'null')
+
+    def represent_str(self, data):
+        tag = None
+        style = None
+        try:
+            data = unicode(data, 'ascii')
+            tag = u'tag:yaml.org,2002:str'
+        except UnicodeDecodeError:
+            try:
+                data = unicode(data, 'utf-8')
+                tag = u'tag:yaml.org,2002:str'
+            except UnicodeDecodeError:
+                data = data.encode('base64')
+                tag = u'tag:yaml.org,2002:binary'
+                style = '|'
+        return self.represent_scalar(tag, data, style=style)
+
+    def represent_unicode(self, data):
+        return self.represent_scalar(u'tag:yaml.org,2002:str', data)
+
+    def represent_bool(self, data):
+        if data:
+            value = u'true'
+        else:
+            value = u'false'
+        return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
+
+    def represent_int(self, data):
+        return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
+
+    def represent_long(self, data):
+        return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
+
+    inf_value = 1e300
+    while repr(inf_value) != repr(inf_value*inf_value):
+        inf_value *= inf_value
+
+    def represent_float(self, data):
+        if data != data or (data == 0.0 and data == 1.0):
+            value = u'.nan'
+        elif data == self.inf_value:
+            value = u'.inf'
+        elif data == -self.inf_value:
+            value = u'-.inf'
+        else:
+            value = unicode(repr(data)).lower()
+            # Note that in some cases `repr(data)` represents a float number
+            # without the decimal parts.  For instance:
+            #   >>> repr(1e17)
+            #   '1e17'
+            # Unfortunately, this is not a valid float representation according
+            # to the definition of the `!!float` tag.  We fix this by adding
+            # '.0' before the 'e' symbol.
+            if u'.' not in value and u'e' in value:
+                value = value.replace(u'e', u'.0e', 1)
+        return self.represent_scalar(u'tag:yaml.org,2002:float', value)
+
+    def represent_list(self, data):
+        #pairs = (len(data) > 0 and isinstance(data, list))
+        #if pairs:
+        #    for item in data:
+        #        if not isinstance(item, tuple) or len(item) != 2:
+        #            pairs = False
+        #            break
+        #if not pairs:
+            return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
+        #value = []
+        #for item_key, item_value in data:
+        #    value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
+        #        [(item_key, item_value)]))
+        #return SequenceNode(u'tag:yaml.org,2002:pairs', value)
+
+    def represent_dict(self, data):
+        return self.represent_mapping(u'tag:yaml.org,2002:map', data)
+
+    def represent_set(self, data):
+        value = {}
+        for key in data:
+            value[key] = None
+        return self.represent_mapping(u'tag:yaml.org,2002:set', value)
+
+    def represent_date(self, data):
+        value = unicode(data.isoformat())
+        return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+
+    def represent_datetime(self, data):
+        value = unicode(data.isoformat(' '))
+        return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+
+    def represent_yaml_object(self, tag, data, cls, flow_style=None):
+        if hasattr(data, '__getstate__'):
+            state = data.__getstate__()
+        else:
+            state = data.__dict__.copy()
+        return self.represent_mapping(tag, state, flow_style=flow_style)
+
+    def represent_undefined(self, data):
+        raise RepresenterError("cannot represent an object: %s" % data)
+
+SafeRepresenter.add_representer(type(None),
+        SafeRepresenter.represent_none)
+
+SafeRepresenter.add_representer(str,
+        SafeRepresenter.represent_str)
+
+SafeRepresenter.add_representer(unicode,
+        SafeRepresenter.represent_unicode)
+
+SafeRepresenter.add_representer(bool,
+        SafeRepresenter.represent_bool)
+
+SafeRepresenter.add_representer(int,
+        SafeRepresenter.represent_int)
+
+SafeRepresenter.add_representer(long,
+        SafeRepresenter.represent_long)
+
+SafeRepresenter.add_representer(float,
+        SafeRepresenter.represent_float)
+
+SafeRepresenter.add_representer(list,
+        SafeRepresenter.represent_list)
+
+SafeRepresenter.add_representer(tuple,
+        SafeRepresenter.represent_list)
+
+SafeRepresenter.add_representer(dict,
+        SafeRepresenter.represent_dict)
+
+SafeRepresenter.add_representer(set,
+        SafeRepresenter.represent_set)
+
+SafeRepresenter.add_representer(datetime.date,
+        SafeRepresenter.represent_date)
+SafeRepresenter.add_representer(datetime.datetime,
+        SafeRepresenter.represent_datetime)
+
+SafeRepresenter.add_representer(None,
+        SafeRepresenter.represent_undefined)
+
+class Representer(SafeRepresenter):
+
+    def represent_str(self, data):
+        tag = None
+        style = None
+        try:
+            data = unicode(data, 'ascii')
+            tag = u'tag:yaml.org,2002:str'
+        except UnicodeDecodeError:
+            try:
+                data = unicode(data, 'utf-8')
+                tag = u'tag:yaml.org,2002:python/str'
+            except UnicodeDecodeError:
+                data = data.encode('base64')
+                tag = u'tag:yaml.org,2002:binary'
+                style = '|'
+        return self.represent_scalar(tag, data, style=style)
+
+    def represent_unicode(self, data):
+        tag = None
+        try:
+            data.encode('ascii')
+            tag = u'tag:yaml.org,2002:python/unicode'
+        except UnicodeEncodeError:
+            tag = u'tag:yaml.org,2002:str'
+        return self.represent_scalar(tag, data)
+
+    def represent_long(self, data):
+        tag = u'tag:yaml.org,2002:int'
+        if int(data) is not data:
+            tag = u'tag:yaml.org,2002:python/long'
+        return self.represent_scalar(tag, unicode(data))
+
+    def represent_complex(self, data):
+        if data.imag == 0.0:
+            data = u'%r' % data.real
+        elif data.real == 0.0:
+            data = u'%rj' % data.imag
+        elif data.imag > 0:
+            data = u'%r+%rj' % (data.real, data.imag)
+        else:
+            data = u'%r%rj' % (data.real, data.imag)
+        return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
+
+    def represent_tuple(self, data):
+        return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
+
+    def represent_name(self, data):
+        name = u'%s.%s' % (data.__module__, data.__name__)
+        return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
+
+    def represent_module(self, data):
+        return self.represent_scalar(
+                u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
+
+    def represent_instance(self, data):
+        # For instances of classic classes, we use __getinitargs__ and
+        # __getstate__ to serialize the data.
+
+        # If data.__getinitargs__ exists, the object must be reconstructed by
+        # calling cls(**args), where args is a tuple returned by
+        # __getinitargs__. Otherwise, the cls.__init__ method should never be
+        # called and the class instance is created by instantiating a trivial
+        # class and assigning to the instance's __class__ variable.
+
+        # If data.__getstate__ exists, it returns the state of the object.
+        # Otherwise, the state of the object is data.__dict__.
+
+        # We produce either a !!python/object or !!python/object/new node.
+        # If data.__getinitargs__ does not exist and state is a dictionary, we
+        # produce a !!python/object node . Otherwise we produce a
+        # !!python/object/new node.
+
+        cls = data.__class__
+        class_name = u'%s.%s' % (cls.__module__, cls.__name__)
+        args = None
+        state = None
+        if hasattr(data, '__getinitargs__'):
+            args = list(data.__getinitargs__())
+        if hasattr(data, '__getstate__'):
+            state = data.__getstate__()
+        else:
+            state = data.__dict__
+        if args is None and isinstance(state, dict):
+            return self.represent_mapping(
+                    u'tag:yaml.org,2002:python/object:'+class_name, state)
+        if isinstance(state, dict) and not state:
+            return self.represent_sequence(
+                    u'tag:yaml.org,2002:python/object/new:'+class_name, args)
+        value = {}
+        if args:
+            value['args'] = args
+        value['state'] = state
+        return self.represent_mapping(
+                u'tag:yaml.org,2002:python/object/new:'+class_name, value)
+
+    def represent_object(self, data):
+        # We use __reduce__ API to save the data. data.__reduce__ returns
+        # a tuple of length 2-5:
+        #   (function, args, state, listitems, dictitems)
+
+        # For reconstructing, we calls function(*args), then set its state,
+        # listitems, and dictitems if they are not None.
+
+        # A special case is when function.__name__ == '__newobj__'. In this
+        # case we create the object with args[0].__new__(*args).
+
+        # Another special case is when __reduce__ returns a string - we don't
+        # support it.
+
+        # We produce a !!python/object, !!python/object/new or
+        # !!python/object/apply node.
+
+        cls = type(data)
+        if cls in copy_reg.dispatch_table:
+            reduce = copy_reg.dispatch_table[cls](data)
+        elif hasattr(data, '__reduce_ex__'):
+            reduce = data.__reduce_ex__(2)
+        elif hasattr(data, '__reduce__'):
+            reduce = data.__reduce__()
+        else:
+            raise RepresenterError("cannot represent object: %r" % data)
+        reduce = (list(reduce)+[None]*5)[:5]
+        function, args, state, listitems, dictitems = reduce
+        args = list(args)
+        if state is None:
+            state = {}
+        if listitems is not None:
+            listitems = list(listitems)
+        if dictitems is not None:
+            dictitems = dict(dictitems)
+        if function.__name__ == '__newobj__':
+            function = args[0]
+            args = args[1:]
+            tag = u'tag:yaml.org,2002:python/object/new:'
+            newobj = True
+        else:
+            tag = u'tag:yaml.org,2002:python/object/apply:'
+            newobj = False
+        function_name = u'%s.%s' % (function.__module__, function.__name__)
+        if not args and not listitems and not dictitems \
+                and isinstance(state, dict) and newobj:
+            return self.represent_mapping(
+                    u'tag:yaml.org,2002:python/object:'+function_name, state)
+        if not listitems and not dictitems  \
+                and isinstance(state, dict) and not state:
+            return self.represent_sequence(tag+function_name, args)
+        value = {}
+        if args:
+            value['args'] = args
+        if state or not isinstance(state, dict):
+            value['state'] = state
+        if listitems:
+            value['listitems'] = listitems
+        if dictitems:
+            value['dictitems'] = dictitems
+        return self.represent_mapping(tag+function_name, value)
+
+Representer.add_representer(str,
+        Representer.represent_str)
+
+Representer.add_representer(unicode,
+        Representer.represent_unicode)
+
+Representer.add_representer(long,
+        Representer.represent_long)
+
+Representer.add_representer(complex,
+        Representer.represent_complex)
+
+Representer.add_representer(tuple,
+        Representer.represent_tuple)
+
+Representer.add_representer(type,
+        Representer.represent_name)
+
+Representer.add_representer(types.ClassType,
+        Representer.represent_name)
+
+Representer.add_representer(types.FunctionType,
+        Representer.represent_name)
+
+Representer.add_representer(types.BuiltinFunctionType,
+        Representer.represent_name)
+
+Representer.add_representer(types.ModuleType,
+        Representer.represent_module)
+
+Representer.add_multi_representer(types.InstanceType,
+        Representer.represent_instance)
+
+Representer.add_multi_representer(object,
+        Representer.represent_object)
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/80e25b46/tools/bin/ext/yaml/resolver.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/resolver.py b/tools/bin/ext/yaml/resolver.py
new file mode 100644
index 0000000..0feb12b
--- /dev/null
+++ b/tools/bin/ext/yaml/resolver.py
@@ -0,0 +1,239 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = ['BaseResolver', 'Resolver']
+
+from error import *
+from nodes import *
+
+import re
+
+class ResolverError(YAMLError):
+    pass
+
+class BaseResolver(object):
+
+    DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
+    DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
+    DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+
+    yaml_implicit_resolvers = {}
+    yaml_path_resolvers = {}
+
+    def __init__(self):
+        self.resolver_exact_paths = []
+        self.resolver_prefix_paths = []
+
+    def add_implicit_resolver(cls, tag, regexp, first):
+        if not 'yaml_implicit_resolvers' in cls.__dict__:
+            cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
+        if first is None:
+            first = [None]
+        for ch in first:
+            cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
+    add_implicit_resolver = classmethod(add_implicit_resolver)
+
+    def add_path_resolver(cls, tag, path, kind=None):
+        # Note: `add_path_resolver` is experimental.  The API could be changed.
+        # `new_path` is a pattern that is matched against the path from the
+        # root to the node that is being considered.  `node_path` elements are
+        # tuples `(node_check, index_check)`.  `node_check` is a node class:
+        # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`.  `None`
+        # matches any kind of a node.  `index_check` could be `None`, a boolean
+        # value, a string value, or a number.  `None` and `False` match against
+        # any _value_ of sequence and mapping nodes.  `True` matches against
+        # any _key_ of a mapping node.  A string `index_check` matches against
+        # a mapping value that corresponds to a scalar key which content is
+        # equal to the `index_check` value.  An integer `index_check` matches
+        # against a sequence value with the index equal to `index_check`.
+        if not 'yaml_path_resolvers' in cls.__dict__:
+            cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
+        new_path = []
+        for element in path:
+            if isinstance(element, (list, tuple)):
+                if len(element) == 2:
+                    node_check, index_check = element
+                elif len(element) == 1:
+                    node_check = element[0]
+                    index_check = True
+                else:
+                    raise ResolverError("Invalid path element: %s" % element)
+            else:
+                node_check = None
+                index_check = element
+            if node_check is str:
+                node_check = ScalarNode
+            elif node_check is list:
+                node_check = SequenceNode
+            elif node_check is dict:
+                node_check = MappingNode
+            elif node_check not in [ScalarNode, SequenceNode, MappingNode]  \
+                    and not isinstance(node_check, basestring)  \
+                    and node_check is not None:
+                raise ResolverError("Invalid node checker: %s" % node_check)
+            if not isinstance(index_check, (basestring, int))   \
+                    and index_check is not None:
+                raise ResolverError("Invalid index checker: %s" % index_check)
+            new_path.append((node_check, index_check))
+        if kind is str:
+            kind = ScalarNode
+        elif kind is list:
+            kind = SequenceNode
+        elif kind is dict:
+            kind = MappingNode
+        elif kind not in [ScalarNode, SequenceNode, MappingNode]    \
+                and kind is not None:
+            raise ResolverError("Invalid node kind: %s" % kind)
+        cls.yaml_path_resolvers[tuple(new_path), kind] = tag
+    add_path_resolver = classmethod(add_path_resolver)
+
+    def descend_resolver(self, current_node, current_index):
+        if not self.yaml_path_resolvers:
+            return
+        exact_paths = {}
+        prefix_paths = []
+        if current_node:
+            depth = len(self.resolver_prefix_paths)
+            for path, kind in self.resolver_prefix_paths[-1]:
+                if self.check_resolver_prefix(depth, path, kind,
+                        current_node, current_index):
+                    if len(path) > depth:
+                        prefix_paths.append((path, kind))
+                    else:
+                        exact_paths[kind] = self.yaml_path_resolvers[path, kind]
+        else:
+            for path, kind in self.yaml_path_resolvers:
+                if not path:
+                    exact_paths[kind] = self.yaml_path_resolvers[path, kind]
+                else:
+                    prefix_paths.append((path, kind))
+        self.resolver_exact_paths.append(exact_paths)
+        self.resolver_prefix_paths.append(prefix_paths)
+
+    def ascend_resolver(self):
+        if not self.yaml_path_resolvers:
+            return
+        self.resolver_exact_paths.pop()
+        self.resolver_prefix_paths.pop()
+
+    def check_resolver_prefix(self, depth, path, kind,
+            current_node, current_index):
+        node_check, index_check = path[depth-1]
+        if isinstance(node_check, basestring):
+            if current_node.tag != node_check:
+                return
+        elif node_check is not None:
+            if not isinstance(current_node, node_check):
+                return
+        if index_check is True and current_index is not None:
+            return
+        if (index_check is False or index_check is None)    \
+                and current_index is None:
+            return
+        if isinstance(index_check, basestring):
+            if not (isinstance(current_index, ScalarNode)
+                    and index_check == current_index.value):
+                return
+        elif isinstance(index_check, int) and not isinstance(index_check, bool):
+            if index_check != current_index:
+                return
+        return True
+
+    def resolve(self, kind, value, implicit):
+        if kind is ScalarNode and implicit[0]:
+            if value == u'':
+                resolvers = self.yaml_implicit_resolvers.get(u'', [])
+            else:
+                resolvers = self.yaml_implicit_resolvers.get(value[0], [])
+            resolvers += self.yaml_implicit_resolvers.get(None, [])
+            for tag, regexp in resolvers:
+                if regexp.match(value):
+                    return tag
+            implicit = implicit[1]
+        if self.yaml_path_resolvers:
+            exact_paths = self.resolver_exact_paths[-1]
+            if kind in exact_paths:
+                return exact_paths[kind]
+            if None in exact_paths:
+                return exact_paths[None]
+        if kind is ScalarNode:
+            return self.DEFAULT_SCALAR_TAG
+        elif kind is SequenceNode:
+            return self.DEFAULT_SEQUENCE_TAG
+        elif kind is MappingNode:
+            return self.DEFAULT_MAPPING_TAG
+
+class Resolver(BaseResolver):
+    pass
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:bool',
+        re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
+                    |true|True|TRUE|false|False|FALSE
+                    |on|On|ON|off|Off|OFF)$''', re.X),
+        list(u'yYnNtTfFoO'))
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:float',
+        re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)?\.[0-9_]*(?:[eE][-+][0-9]+)?
+                    |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
+                    |[-+]?\.(?:inf|Inf|INF)
+                    |\.(?:nan|NaN|NAN))$''', re.X),
+        list(u'-+0123456789.'))
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:int',
+        re.compile(ur'''^(?:[-+]?0b[0-1_]+
+                    |[-+]?0[0-7_]+
+                    |[-+]?(?:0|[1-9][0-9_]*)
+                    |[-+]?0x[0-9a-fA-F_]+
+                    |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
+        list(u'-+0123456789'))
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:merge',
+        re.compile(ur'^(?:<<)$'),
+        ['<'])
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:null',
+        re.compile(ur'''^(?: ~
+                    |null|Null|NULL
+                    | )$''', re.X),
+        [u'~', u'n', u'N', u''])
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:timestamp',
+        re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+                    |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
+                     (?:[Tt]|[ \t]+)[0-9][0-9]?
+                     :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
+                     (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
+        list(u'0123456789'))
+
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:value',
+        re.compile(ur'^(?:=)$'),
+        ['='])
+
+# The following resolver is only for documentation purposes. It cannot work
+# because plain scalars cannot start with '!', '&', or '*'.
+Resolver.add_implicit_resolver(
+        u'tag:yaml.org,2002:yaml',
+        re.compile(ur'^(?:!|&|\*)$'),
+        list(u'!&*'))
+


Mime
View raw message