corinthia-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pmke...@apache.org
Subject [2/2] incubator-corinthia git commit: Flat: Build Grammar object from parsed PEG file
Date Fri, 17 Jul 2015 08:19:27 GMT
Flat: Build Grammar object from parsed PEG file

Introduce the ability to construct a new Grammar object from the results
of parsing a grammar file in the PEG syntax. This allows us to write
custom grammars to support arbitrary text-based file formats.

The grammar building process works by traversing a tree of Term objects
produced as a result of a parse. As each instance of the Definition
production is constructed, an Expression object is created for its body
based corresponding portion of the parse tree, and a new definition is
added to the Grammar object being built.

The resulting grammar can then be used to parse a file in the syntax
defined by that grammar - though this is not yet supported as a
command-line option.

Currently, a large number of assert statements are used when processing
the Term tree. These sanity checks ensure that the terms match what we
expect from the grammar. It is anticipated that in the future it will be
possible to write processing code in a more type-safe manner, both to a
limited extent in C, and using a dedicated tree-transformation language
to be integrated with the parsing process. The current parser is
essentially an interpreter which we can extend with additional language
constructs to support this transformation.


Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/1707a712
Tree: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/1707a712
Diff: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/1707a712

Branch: refs/heads/master
Commit: 1707a7125eef6cf208ea17f1887b6e669ce5f8eb
Parents: c6a23ee
Author: Peter Kelly <peter@uxproductivity.com>
Authored: Fri Jul 17 15:09:34 2015 +0700
Committer: Peter Kelly <peter@uxproductivity.com>
Committed: Fri Jul 17 15:14:16 2015 +0700

----------------------------------------------------------------------
 experiments/flat/src/BuildGrammar.c | 505 +++++++++++++++++++++++++++++++
 experiments/flat/src/BuildGrammar.h |  23 ++
 experiments/flat/src/CMakeLists.txt |   2 +
 experiments/flat/src/Term.c         |  47 +++
 experiments/flat/src/Term.h         |   8 +
 experiments/flat/src/flat.c         |  41 ++-
 6 files changed, 623 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/BuildGrammar.c
----------------------------------------------------------------------
diff --git a/experiments/flat/src/BuildGrammar.c b/experiments/flat/src/BuildGrammar.c
new file mode 100644
index 0000000..fd3a169
--- /dev/null
+++ b/experiments/flat/src/BuildGrammar.c
@@ -0,0 +1,505 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "Common.h"
+#include "BuildGrammar.h"
+#include "Util.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#define MAX_CHILDREN 64
+
+typedef struct Builder Builder;
+
+struct Builder {
+    Grammar *gram;
+    const char *input;
+    int len;
+};
+
+static Expression *buildExpression(Builder *builder, Term *term);
+
+static char *unescapeLiteral(const char *escaped)
+{
+    size_t escapedLen = strlen(escaped);
+    size_t escapedPos = 0;
+    size_t unescapedLen = 0;
+    char *unescaped = (char *)malloc(escapedLen+1);
+
+    while (escapedPos < escapedLen) {
+        char c = escaped[escapedPos++];
+        if ((c == '\\') && (escapedPos < escapedLen)) {
+            c = escaped[escapedPos++];
+            switch (c) {
+                case 'n':
+                    unescaped[unescapedLen++] = '\n';
+                    break;
+                case 'r':
+                    unescaped[unescapedLen++] = '\r';
+                    break;
+                case 't':
+                    unescaped[unescapedLen++] = '\t';
+                    break;
+                default:
+                    unescaped[unescapedLen++] = c;
+                    break;
+            }
+        }
+        else {
+            unescaped[unescapedLen++] = c;
+        }
+    }
+
+    unescaped[unescapedLen] = '\0';
+    return unescaped;
+}
+
+// Sanity checking functions
+//
+// These are just used to verify that a Term we're about to use is of the expected type.
+// Since the format of the parse tree is likely to change in these early stages of development,
+// this will catch cases where the code in this file has not been adapted to those changes
+//
+// The large number of assert statements in this file, most of which use these functions,
will
+// likely only be necessary in the short term. Ideally we should be able to get to a point
where
+// one can safely write code that consumes a parse tree without having so many sanity checks.
+
+static int isTerm(Term *term, ExprKind kind, int count)
+{
+    return ((TermKind(term) == kind) && (TermCount(term) == count));
+}
+
+static int isSequence(Term *term, int count)
+{
+    return isTerm(term,SequenceExpr,count);
+}
+
+static int isIdent(Term *term, const char *name)
+{
+    return (isTerm(term,IdentExpr,1) && !strcmp(name,ExprIdentValue(TermType(term))));
+}
+
+// Extract a substring of the input based on a specific term, or one or more children of
a term.
+//
+// Each Term object has a start and end field, which specify which portion of the input string
+// the term covers (that is, the portion of the input consumed durin parsing of that term).
In
+// some cases we want all the text matched by a term, and in others we only want the text
matched
+// by specific children. An example of the latter is a "pseudo-terminal" like Identifier
where
+// we want all characters except the trailing whitespace.
+
+static char *termString(Builder *builder, Term *term)
+{
+    assert(term->start >= 0);
+    assert(term->end <= builder->len);
+    int len = term->end - term->start;
+    char *str = malloc(len+1);
+    memcpy(str,&builder->input[term->start],len);
+    str[len] = '\0';
+    return str;
+}
+
+static char *termChildrenString(Builder *builder, Term *term, int firstIndex, int lastIndex)
+{
+    Term *firstChild = TermChildAt(term,firstIndex);
+    Term *lastChild = TermChildAt(term,lastIndex);
+    int start = firstChild->start;
+    int end = lastChild->end;
+
+    assert(start >= 0);
+    assert(end <= builder->len);
+    int len = end - start;
+    char *str = malloc(len+1);
+    memcpy(str,&builder->input[start],len);
+    str[len] = '\0';
+
+    return str;
+}
+
+static char *identifierString(Builder *builder, Term *term)
+{
+    assert(isIdent(term,"Identifier"));
+    Term *body = TermChildAt(term,0);
+    assert(isSequence(body,3));
+    return termChildrenString(builder,body,0,1);
+}
+
+// For Terms of type ChoiceExpr, determine which of the choices the corresponding term is
+
+static int choiceIndex(Term *body)
+{
+    assert(TermKind(body) == ChoiceExpr);
+    assert(TermCount(body) == 1);
+    Term *choice = TermChildAt(body,0);
+    int match = -1;
+    int choiceCount = ExprChoiceCount(TermType(body));
+    for (int i = 0; i < choiceCount; i++) {
+        if (TermType(choice) == ExpressionChildAt(TermType(body),i))
+            match = i;
+    }
+    return match;
+}
+
+// Expression building functions (build*)
+//
+// There is one of these for each type of expression that can be present in the grammar.
The
+// supplied Term object is, in all cases, of an IdentExpr type, whose single child is the
body
+// of the corresponding grammar rule.
+//
+// For example, buildPrimary is called with an ExpressionType of IdentExpr "Primary", and
the
+// body is a ChoiceExpr which can contain one of five possible child types, as given in the
+// PEG grammar.
+//
+// To see the exact expression tree of a given rule, look at its definition in Builtin.c
+
+static Expression *buildIdentifier(Builder *builder, Term *term)
+{
+    char *str = identifierString(builder,term);
+    Expression *result = ExpressionNewIdent(str);
+    free(str);
+    return result;
+}
+
+static Expression *buildLiteral(Builder *builder, Term *term)
+{
+    // The Literal rule in the built-in PEG grammar contains two choices - one for single
quotes
+    // and the other for double quotes - which otherwise have the same structure. There are
four
+    // children, where child 0 and child 2 represent the quotes themselves, child 1 represents
the
+    // escaped representation of a string, and child 3 is any trailing whitespace.
+    //
+    // All we're interested in is the unescaped string, so we get child 2 and convert any
escape
+    // sequences (e.g. \" or \n) to the characters they represent, and return a new literal
+    // expression with the unescaped string as its value.
+
+    assert(isIdent(term,"Literal"));
+    Term *body = TermChildAt(term,0);
+    assert(TermKind(body) == ChoiceExpr);
+    int index = choiceIndex(body);
+    assert((index == 0) || (index == 1));
+    Term *choice = TermChildAt(body,0);
+    assert(isSequence(choice,4));
+    Term *content = TermChildAt(choice,1);
+    char *escaped = termString(builder,content);
+    char *unescaped = unescapeLiteral(escaped);
+    Expression *result = ExpressionNewLit(unescaped);
+    free(escaped);
+    free(unescaped);
+    return result;
+}
+
+static int decodeRangeChar(Builder *builder, Term *charTerm)
+{
+    // FIXME: Handle non-ASCII chars encoded as UTF-8, as well as numeric escape sequences
+    // We don't need to allocate a string for unescaped here, we should just do it directly
+
+    assert(isIdent(charTerm,"Char"));
+    char *escaped = termString(builder,charTerm);
+    char *unescaped = unescapeLiteral(escaped);
+    int result;
+    if (strlen(unescaped) == 0)
+        result = '?';
+    else if (unescaped[0] < 0) // Start of UTF-8 multibyte sequence
+        result = '?';
+    else
+        result = unescaped[0];
+    free(escaped);
+    free(unescaped);
+    return result;
+}
+
+static Expression *buildRange(Builder *builder, Term *term)
+{
+    // A range expression, one or more of which appear inside a [...] character class expression,
+    // matches a character in a range between a start and end value. We use a single expression
+    // type to represent both exact matches (where the minimum and maximum are the same),
and
+    // "true" ranges (which match two or more possible characters).
+    //
+    // Note that the representation we use for Range expressions is a (start,end) pair, where
a
+    // character match must satisfy the condition start <= c < end (that is, the range
includes the
+    // start, but does *not* include the end). This representation is for the convenience
of other
+    // code that works with ranges. The ExpressionNewRange function however takes the minimum
and
+    // maximum values - that is, a matching character c must satisfy min <= c <= max.
+
+    assert(isIdent(term,"Range"));
+    Term *body = TermChildAt(term,0);
+    int index = choiceIndex(body);
+    Term *choice = TermChildAt(body,0);
+    assert((index == 0) || (index == 1));
+    if (index == 0) {
+        assert(isSequence(choice,3));
+        Term *minChar = TermChildAt(choice,0);
+        Term *maxChar = TermChildAt(choice,2);
+        assert(isIdent(minChar,"Char"));
+        assert(isIdent(maxChar,"Char"));
+        int min = decodeRangeChar(builder,minChar);
+        int max = decodeRangeChar(builder,maxChar);
+        return ExpressionNewRange(min,max);
+    }
+    else {
+        assert(isIdent(choice,"Char"));
+        int value = decodeRangeChar(builder,choice);
+        return ExpressionNewRange(value,value);
+    }
+}
+
+static Expression *buildClass(Builder *builder, Term *term)
+{
+    // A character class expression is the same as a choice expression, except that it is
only
+    // supposed to contain range expressions.
+
+    assert(isIdent(term,"Class"));
+    Term *body = TermChildAt(term,0);
+    assert(isSequence(body,4));
+    Term *star = TermChildAt(body,1);
+    assert(TermKind(star) == StarExpr);
+
+    Expression *children[MAX_CHILDREN];
+    int count = 0;
+    for (TermList *item = TermChildren(star); (item != NULL) && (count < MAX_CHILDREN);
item = item->next) {
+        assert(isSequence(item->term,2));
+        Term *rangeTerm = TermChildAt(item->term,1);
+        Expression *rangeExpr = buildRange(builder,rangeTerm);
+        children[count++] = rangeExpr;
+    }
+
+    return ExpressionNewClass(count,children);
+}
+
+static Expression *buildDot(Builder *builder, Term *term)
+{
+    assert(isIdent(term,"DOT"));
+    return ExpressionNewDot();
+}
+
+static Expression *buildPrimary(Builder *builder, Term *term)
+{
+    // A primary expression can be one of five possibilities: An identifier (reference to
another
+    // rule in the grammar), a parenthesised expresion, a literal, a character class, or
a dot
+    // (which matches any character).
+    //
+    // The type of the term is an Expression with kind == ChoiceExpr, and the term has exactly
+    // one child. We use choiceIndex to determine which of the five possible expression types
+    // the choice matches, and then call through to the relevant function to build the appropriate
+    // type of Expression object.
+
+    assert(isIdent(term,"Primary"));
+    Term *body = TermChildAt(term,0);
+    assert(TermKind(body) == ChoiceExpr);
+    assert(TermCount(body) == 1);
+    Term *choice = TermChildAt(body,0);
+
+    switch (choiceIndex(body)) {
+        case 0: {
+            assert(isSequence(choice,2));
+            Term *identifier = TermChildAt(choice,0);
+            return buildIdentifier(builder,identifier);
+        }
+        case 1: {
+            assert(isSequence(choice,3));
+            Term *expression = TermChildAt(choice,1);
+            return buildExpression(builder,expression);
+        }
+        case 2:
+            return buildLiteral(builder,choice);
+        case 3:
+            return buildClass(builder,choice);
+        case 4:
+            return buildDot(builder,choice);
+        default:
+            assert(!"Invalid choice for Primary");
+            return NULL;
+    }
+}
+
+static Expression *buildSuffix(Builder *builder, Term *term)
+{
+    // A suffix expression has two children. The second is optional, and is one of either
QUESTION,
+    // STAR, or PLUS - which indicate that the first child may occur zero or one times, zero
or more
+    // times, or one or more times. The first child is always present, and represents a primary
+    // expression with no prefix or suffix.
+    //
+    // If a QUESTION, STAR, or PLUS suffix is given, we wrap the constructed primary expression
+    // inside another Expression object of type OptExpr, StarExpr, or PlusExpr. Otherwise,
we just
+    // return the primary expression directly.
+
+    assert(isIdent(term,"Suffix"));
+    Term *body = TermChildAt(term,0);
+    assert(isSequence(body,2));
+    Term *primary = TermChildAt(body,0);
+    Term *suffix = TermChildAt(body,1);
+    assert(isIdent(primary,"Primary"));
+    assert(TermKind(suffix) == OptExpr);
+
+    Expression *primaryExpr = buildPrimary(builder,primary);
+
+    assert((TermCount(suffix) == 0) || (TermCount(suffix) == 1));
+    if (TermCount(suffix) == 1) {
+        Term *suffixChild = TermChildAt(suffix,0);
+        int index = choiceIndex(suffixChild);
+        Term *choice = TermChildAt(suffixChild,0);
+        switch (index) {
+            case 0:
+                assert(isIdent(choice,"QUESTION"));
+                return ExpressionNewOpt(primaryExpr);
+            case 1:
+                assert(isIdent(choice,"STAR"));
+                return ExpressionNewStar(primaryExpr);
+            case 2:
+                assert(isIdent(choice,"PLUS"));
+                return ExpressionNewPlus(primaryExpr);
+            default:
+                assert(!"Invalid choice for Suffix");
+                break;
+        }
+    }
+
+    return primaryExpr;
+}
+
+static Expression *buildPrefix(Builder *builder, Term *term)
+{
+    // A prefix expression has two children. The first is optional, and is one of either
AND or
+    // NOT - which indicate a positive or negative lookahead assertion. The second is not
optional,
+    // and represents a primary expression with an optional suffix.
+    //
+    // If an AND or NOT prefx is given, we wrap the constructed primary expression inside
another
+    // Expression object of type AndExpr or NotExpr. Otherwise, we just return the primary
+    // expression directly.
+
+    assert(isIdent(term,"Prefix"));
+    Term *body = TermChildAt(term,0);
+    assert(isSequence(body,2));
+    Term *prefix = TermChildAt(body,0);
+    Term *suffix = TermChildAt(body,1);
+    assert(TermKind(prefix) == OptExpr);
+    assert(isIdent(suffix,"Suffix"));
+
+    Expression *suffixExpr = buildSuffix(builder,suffix);
+
+    assert((TermCount(prefix) == 0) || (TermCount(prefix) == 1));
+    if (TermCount(prefix) == 1) {
+        Term *prefixChild = TermChildAt(prefix,0);
+        int index = choiceIndex(prefixChild);
+        Term *choice = TermChildAt(prefixChild,0);
+        switch (index) {
+            case 0:
+                assert(isIdent(choice,"AND"));
+                return ExpressionNewAnd(suffixExpr);
+            case 1:
+                assert(isIdent(choice,"NOT"));
+                return ExpressionNewNot(suffixExpr);
+            default:
+                assert(!"Invalid choice for Prefix");
+                break;
+        }
+    }
+
+    return suffixExpr;
+}
+
+static Expression *buildSequence(Builder *builder, Term *term)
+{
+    // An Sequence consists of one or more expressions, each of which has an optional prefix
+    // and suffix
+    assert(isIdent(term,"Sequence"));
+    Term *body = TermChildAt(term,0);
+    assert(TermKind(body) == StarExpr);
+
+    Expression *children[MAX_CHILDREN];
+    int count = 0;
+    for (TermList *item = TermChildren(body); (item != NULL) && (count < MAX_CHILDREN);
item = item->next) {
+        children[count] = buildPrefix(builder,item->term);
+        count++;
+    }
+    if (count == 1)
+        return children[0];
+    else
+        return ExpressionNewSequence(count,children);
+}
+
+static Expression *buildExpression(Builder *builder, Term *term)
+{
+    // An Expression consists of one or more choices
+    assert(isIdent(term,"Expression"));
+    Term *body = TermChildAt(term,0);
+    assert(isSequence(body,2));
+
+    Term *child0 = TermChildAt(body,0);
+    Term *child1 = TermChildAt(body,1);
+
+    assert(isIdent(child0,"Sequence"));
+    assert(TermKind(child1) == StarExpr);
+
+    Expression *initial = buildSequence(builder,child0);
+    if (TermCount(child1) == 0)
+        return initial;
+
+    Expression *children[MAX_CHILDREN];
+    children[0] = initial;
+    int count = 1;
+    for (TermList *item = TermChildren(child1); (item != NULL) && (count < MAX_CHILDREN);
item = item->next) {
+        assert(isSequence(item->term,2));
+        children[count] = buildSequence(builder,TermChildAt(item->term,1));
+        count++;
+    }
+    return ExpressionNewChoice(count,children);
+}
+
+static void buildGrammar(Builder *builder, Term *term)
+{
+    assert(isSequence(term,3));
+    Term *plus = TermChildAt(term,1);
+    assert(TermKind(plus) == PlusExpr);
+
+    for (TermList *plusItem = plus->children; plusItem != NULL; plusItem = plusItem->next)
{
+        Term *defIdent = plusItem->term;
+        assert(isIdent(defIdent,"Definition"));
+        Term *defSeq = TermChildAt(defIdent,0);
+        assert(isSequence(defSeq,3));
+        Term *identTerm = TermChildAt(defSeq,0);
+        Term *exprTerm = TermChildAt(defSeq,2);
+        assert(isIdent(identTerm,"Identifier"));
+        assert(isIdent(exprTerm,"Expression"));
+
+        char *ruleName = identifierString(builder,identTerm);
+        Expression *ruleExpr = buildExpression(builder,exprTerm);
+        GrammarDefine(builder->gram,ruleName,ruleExpr);
+        free(ruleName);
+    }
+}
+
+// This function creates a new Grammar object from the result of parsing a file usin the
built-in
+// PEG grammar. The resulting Grammar object can then be used to parse other files which
are written
+// in another language that is accepted by that grammar. The Term objects are constructed
by the
+// parse function defined in Parser.c.
+
+Grammar *grammarFromTerm(Term *term, const char *input)
+{
+    Grammar *gram = GrammarNew();
+
+    Builder *builder = (Builder *)calloc(1,sizeof(Builder));
+    builder->gram = gram;
+    builder->input = input;
+    builder->len = strlen(input);
+
+    buildGrammar(builder,term);
+
+    free(builder);
+
+    return gram;
+}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/BuildGrammar.h
----------------------------------------------------------------------
diff --git a/experiments/flat/src/BuildGrammar.h b/experiments/flat/src/BuildGrammar.h
new file mode 100644
index 0000000..dcf4759
--- /dev/null
+++ b/experiments/flat/src/BuildGrammar.h
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "Grammar.h"
+#include "Term.h"
+
+Grammar *grammarFromTerm(Term *term, const char *input);

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/experiments/flat/src/CMakeLists.txt b/experiments/flat/src/CMakeLists.txt
index e7287d3..12ede19 100644
--- a/experiments/flat/src/CMakeLists.txt
+++ b/experiments/flat/src/CMakeLists.txt
@@ -23,6 +23,8 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 ## group source objects
 ###
 set(SOURCES
+    BuildGrammar.c
+    BuildGrammar.h
     Builtin.c
     Builtin.h
     Common.h

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/Term.c
----------------------------------------------------------------------
diff --git a/experiments/flat/src/Term.c b/experiments/flat/src/Term.c
index 359d539..f8b331f 100644
--- a/experiments/flat/src/Term.c
+++ b/experiments/flat/src/Term.c
@@ -34,6 +34,53 @@ Term *TermNew(Expression *type, int start, int end, TermList *children)
     return term;
 }
 
+ExprKind TermKind(Term *term)
+{
+    return ExpressionKind(TermType(term));
+}
+
+Expression *TermType(Term *term)
+{
+    return term->type;
+}
+
+int TermStart(Term *term)
+{
+    return term->start;
+}
+
+int TermEnd(Term *term)
+{
+    return term->end;
+}
+
+TermList *TermChildren(Term *term)
+{
+    return term->children;
+}
+
+int TermCount(Term *term)
+{
+    int count = 0;
+    for (TermList *item = term->children; item != NULL; item = item->next)
+        count++;
+    return count;
+}
+
+Term *TermChildAt(Term *term, int index)
+{
+    int count = 0;
+    for (TermList *item = term->children; item != NULL; item = item->next) {
+        if (count == index)
+            return item->term;
+        count++;
+    }
+    fprintf(stderr,"No child term of %s at index %d; max %d\n",ExprKindAsString(TermKind(term)),index,TermCount(term));
+    abort();
+    return NULL;
+}
+
+
 TermList *TermListNew(Term *term, TermList *next)
 {
     TermList *list = (TermList *)calloc(1,sizeof(TermList));

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/Term.h
----------------------------------------------------------------------
diff --git a/experiments/flat/src/Term.h b/experiments/flat/src/Term.h
index 865618e..b2a74a8 100644
--- a/experiments/flat/src/Term.h
+++ b/experiments/flat/src/Term.h
@@ -36,6 +36,14 @@ struct TermList {
 
 Term *TermNew(Expression *type, int start, int end, TermList *children);
 
+ExprKind TermKind(Term *term);
+Expression *TermType(Term *term);
+int TermStart(Term *term);
+int TermEnd(Term *term);
+TermList *TermChildren(Term *term);
+int TermCount(Term *term);
+Term *TermChildAt(Term *term, int index);
+
 TermList *TermListNew(Term *term, TermList *next);
 void TermListPtrAppend(TermList ***listPtr, Term *term);
 

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/1707a712/experiments/flat/src/flat.c
----------------------------------------------------------------------
diff --git a/experiments/flat/src/flat.c b/experiments/flat/src/flat.c
index 179bd9c..7ed03d3 100644
--- a/experiments/flat/src/flat.c
+++ b/experiments/flat/src/flat.c
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "Common.h"
+#include "BuildGrammar.h"
 #include "Builtin.h"
 #include "Parser.h"
 #include <stdio.h>
@@ -67,13 +68,47 @@ int main(int argc, const char **argv)
         free(input);
         GrammarFree(gram);
     }
+    else if ((argc == 3) && !strcmp(argv[1],"-b")) {
+        const char *filename = argv[2];
+        char *input = readStringFromFile(filename);
+        if (input == NULL) {
+            perror(filename);
+            exit(1);
+        }
+
+        Grammar *gram = GrammarNewBuiltin();
+        Term *term = parse(gram,"Grammar",input,0,strlen(input));
+        if (term == NULL) {
+            fprintf(stderr,"%s: Parse failed\n",filename);
+            exit(1);
+        }
+
+
+        Grammar *built = grammarFromTerm(term,input);
+        GrammarPrint(built);
+
+        free(input);
+        GrammarFree(gram);
+        GrammarFree(built);
+    }
     else {
         printf("Usage:\n"
                "\n"
-               "flat -g            Print built-in PEG grammar\n"
+               "flat -g\n"
                "\n"
-               "flat -p FILENAME   Parse FILENAME using the built-in PEG grammar, and print\n"
-               "                   the resulting parse tree\n");
+               "    Print the built-in PEG grammar\n"
+               "\n"
+               "flat -p FILENAME\n"
+               "\n"
+               "    Parse FILENAME using the built-in PEG grammar, and print out the resulting\n"
+               "    parse tree\n"
+               "\n"
+               "flat -b FILENAME\n"
+               "\n"
+               "    Parse FILENAME using the built-in PEG grammar, then use the resulting
parse\n"
+               "    tree to build a Grammar object, and print out the constructed grammar.\n"
+               "\n");
+
         return 1;
     }
     return 0;


Mime
View raw message