cocoon-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Bertrand Delacretaz <bdelacre...@codeconsult.ch>
Subject Re: [Ann] chaperon project launched at SF(was: textparser)
Date Mon, 25 Feb 2002 22:42:17 GMT
On Saturday 16 February 2002 17:39, Stephan Michels wrote:
>. . .
> Chaperon is a LALR(1) parser, which parse structured text documents
> and generate XML documents as output. 
>. . .

If anyone wants to play with Chaperon, here's a small command-line 
driver that I wrote for this parser, allows Chaperon to run in a 
minimal environment.

Tested a few minutes ago by compiling it with the current CVS of 
chaperon (http://sourceforge.net/projects/chaperon).

Stephan, feel free to include this code in your project if you like it!

-Bertrand

-------- CODE STARTS HERE ------------------------
/*
 *  Simple command-line driver for the chaperon parser.
 *  See http://www.sourceforge.net/projects/chaperon
 *  Copyright (C) Bertrand Delacretaz, www.codeconsult.ch. 
 *  All rights reserved.
 *  -------------------------------------------------------------------
 *  This software is published under the terms of the Apache Software 
 *  License version 1.1, a copy of which has been included  with this 
 *  distribution in the LICENSE file.
 */

package net.sourceforge.chaperon.cmdline;

import java.io.OutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.OutputStreamWriter;

import org.apache.xml.serialize.Method;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;

import org.apache.xerces.parsers.SAXParser;

import org.xml.sax.SAXParseException;
import org.xml.sax.InputSource;

import net.sourceforge.chaperon.grammar.Grammar;
import net.sourceforge.chaperon.grammar.SyntaxErrorException;
import net.sourceforge.chaperon.grammar.generator.SAXGrammarGenerator;
import net.sourceforge.chaperon.parser.generator.ParserTableGenerator;
import net.sourceforge.chaperon.parser.ParserTable;
import net.sourceforge.chaperon.parser.Parser;
import net.sourceforge.chaperon.parser.CompressedDocument;

/**
 * Simple command-line driver for the chaperon parser
 *
 * @author Bertrand Delacretaz bdelacretaz@codeconsult.ch
 * @version $Revision$
 */
public class CmdLineParser {
    
    /** Parse grammarFile and use its grammar to parse inputFile.
     *  Write the result to os.
     *  Does not store the compiled grammar, recompiles it every time.
     */
    CmdLineParser(File grammarFile, File inputFile, OutputStream os)
    throws Exception {
        final ParserTable pt = parseGrammar(grammarFile);
        final CompressedDocument cd = parseInput(pt,inputFile,os);
        dumpDocument(cd,os);
    }
    
    /** parse supplied grammarFile */
    private ParserTable parseGrammar(File grammarFile)
    throws Exception {
        final SAXParser parser = new SAXParser();
        final SAXGrammarGenerator gg = new SAXGrammarGenerator();
        parser.setContentHandler(gg);
        
        info("parsing grammar file " + grammarFile.getName() + "...");
        parser.parse(grammarFile.getAbsolutePath());
        
        info("building parser table...");
        final Grammar g = gg.getGrammar();
        if(g == null) {
            throw new Exception("no Grammar was generated while parsing 
grammar file");
        }
        final ParserTableGenerator ptg = new ParserTableGenerator(g);
        return ptg.getParserTable();
    }
    
    /** parse supplied inputFile using supplied ParserTable (compiled 
grammar) */
    private CompressedDocument parseInput(ParserTable pt,File 
inputFile,OutputStream os)
    throws Exception {
        info("parsing input file " + inputFile.getName() + "...");
        final Parser p = new Parser();
        final InputSource is = new InputSource(new 
FileInputStream(inputFile));
        return p.parse(pt,is);
    }

    /** dump supplied CompressedDocument to supplied OutputStream */
    private void dumpDocument(CompressedDocument cd,OutputStream os)
    throws Exception {
        info("dumping parsed XML document...");
        final String encoding = "iso-8859-1";
        final OutputFormat format = new 
OutputFormat(Method.XML,encoding,true);
        format.setIndenting(true);
        format.setIndent(1);
        
        final OutputStreamWriter osw = new OutputStreamWriter(os);
        final XMLSerializer xmls = new XMLSerializer(osw,format);
        
        cd.toSAX(xmls.asContentHandler(), null);
        os.flush();
    }

    /** trivial logging mechanism */
    protected static void warn(String msg) {
        System.err.println("Chaperon CmdLineParser WARNING : " + msg);
    }
    
    /** trivial info mechanism */
    protected static void info(String msg) {
        System.err.println("Chaperon CmdLineParser: " + msg);
    }
    
    /** Entry point to parser from the command-line.
     *  Compiles the given grammar file and runs the parser on given 
input file.
     *  Output goes to stdout unless an output filename is specified
     */
    public static void main(String args[])
    throws Exception {
        if(args.length < 2) {
            warn("usage: CmdLineParser <grammarFile> <inputFile> 
[outputFile]");
            System.exit(1);
        }
        
        final String grammarFile = args[0];
        final String inputFile = args[1];
        final String outputFile = args.length > 2 ? args[2] : null;
        
        info("using grammar file: " + grammarFile);
        info("using input file: " + grammarFile);
        info("using output file: " + (outputFile == null ? "stdout" : 
outputFile));
        
        OutputStream os = System.out;
        if(outputFile != null) {
            os = new FileOutputStream(outputFile);
        }
        
        new CmdLineParser(new File(grammarFile),new File(inputFile),os);
        info("all done.");
    }
}
--- CODE ENDS HERE - no kidding, you read it entirely? ----------------







---------------------------------------------------------------------
To unsubscribe, e-mail: cocoon-dev-unsubscribe@xml.apache.org
For additional commands, email: cocoon-dev-help@xml.apache.org


Mime
View raw message