abdera-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmsn...@apache.org
Subject svn commit: r584310 - in /incubator/abdera/java/trunk: core/src/main/java/org/apache/abdera/parser/ core/src/main/java/org/apache/abdera/util/ parser/src/main/java/org/apache/abdera/parser/stax/
Date Fri, 12 Oct 2007 22:45:34 GMT
Author: jmsnell
Date: Fri Oct 12 15:45:28 2007
New Revision: 584310

URL: http://svn.apache.org/viewvc?rev=584310&view=rev
Log:
Provide a mechanism for dealing with commonly used (X)HTML entity references.  entities can
be registered
using ParserOptions.  All the default HTML entities are preregistered.  

The one downside to this mechanism is that it changes the xml infoset of the parsed objects
so that digital signatures computed over documents that contain the unresolved entities are
unverifiable.  To deal with this, there is a mechanism for turning off the replacement so
that the current behavior of throwing an error is preserved.

Modified:
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/parser/ParserOptions.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/AbstractParserOptions.java
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMParser.java

Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/parser/ParserOptions.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/parser/ParserOptions.java?rev=584310&r1=584309&r2=584310&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/parser/ParserOptions.java
(original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/parser/ParserOptions.java
Fri Oct 12 15:45:28 2007
@@ -126,4 +126,12 @@
    * Ignored when parsing a Reader
    */
   void setCompressionCodecs(CompressionCodec... codecs);
+  
+  void registerEntity(String name, String value);
+  
+  String resolveEntity(String name);
+  
+  void setResolveEntities(boolean resolve);
+  
+  boolean getResolveEntities();
 }

Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/AbstractParserOptions.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/AbstractParserOptions.java?rev=584310&r1=584309&r2=584310&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/AbstractParserOptions.java
(original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/AbstractParserOptions.java
Fri Oct 12 15:45:28 2007
@@ -17,6 +17,9 @@
 */
 package org.apache.abdera.util;
 
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.abdera.factory.Factory;
 import org.apache.abdera.filter.ParseFilter;
 import org.apache.abdera.parser.ParserOptions;
@@ -36,10 +39,16 @@
   protected boolean filterreserved = false;
   protected char replacement = 0;
   protected CompressionCodec[] codecs = null;
+  protected boolean resolveentities = true;
+  protected Map<String,String> entities = new HashMap<String,String>();
 
   protected abstract void initFactory();
   protected abstract void checkFactory(Factory factory);
 
+  protected AbstractParserOptions() {
+    initDefaultEntities();
+  }
+  
   public Object clone() throws CloneNotSupportedException {
     AbstractParserOptions copy = (AbstractParserOptions) super.clone();
    
@@ -118,4 +127,274 @@
     this.codecs = codecs;
   }
   
+  public void registerEntity(String name, String value) {
+    entities.put(name, value);
+  }
+
+  private void initDefaultEntities() {
+    registerEntity("quot","\"");
+    registerEntity("amp","\u0026");
+    registerEntity("lt","\u003C");
+    registerEntity("gt","\u003E");
+    registerEntity("nbsp"," ");
+    registerEntity("iexcl","\u00A1");
+    registerEntity("cent","\u00A2");
+    registerEntity("pound","\u00A3");
+    registerEntity("curren","\u00A4");
+    registerEntity("yen","\u00A5");
+    registerEntity("brvbar","\u00A6");
+    registerEntity("sect","\u00A7");
+    registerEntity("uml","\u00A8");
+    registerEntity("copy","\u00A9");
+    registerEntity("ordf","\u00AA");
+    registerEntity("laquo","\u00AB");
+    registerEntity("not","\u00AC");
+    registerEntity("shy","\u00AD");
+    registerEntity("reg","\u00AE");
+    registerEntity("macr","\u00AF");
+    registerEntity("deg","\u00B0");
+    registerEntity("plusmn","\u00B1");
+    registerEntity("sup2","\u00B2");
+    registerEntity("sup3","\u00B3");
+    registerEntity("acute","\u00B4");
+    registerEntity("micro","\u00B5");
+    registerEntity("para","\u00B6");
+    registerEntity("middot","\u00B7");
+    registerEntity("cedil","\u00B8");
+    registerEntity("sup1","\u00B9");
+    registerEntity("ordm","\u00BA");
+    registerEntity("raquo","\u00BB");
+    registerEntity("frac14","\u00BC");
+    registerEntity("frac12","\u00BD");
+    registerEntity("frac34","\u00BE");
+    registerEntity("iquest","\u00BF");
+    registerEntity("agrave","\u00C0");
+    registerEntity("aacute","\u00C1");
+    registerEntity("acirc","\u00C2");
+    registerEntity("atilde","\u00C3");
+    registerEntity("auml","\u00C4");
+    registerEntity("aring","\u00C5");
+    registerEntity("aelig","\u00C6");
+    registerEntity("ccedil","\u00C7");
+    registerEntity("egrave","\u00C8");
+    registerEntity("eacute","\u00C9");
+    registerEntity("ecirc","\u00CA");
+    registerEntity("euml","\u00CB");
+    registerEntity("igrave","\u00CC");
+    registerEntity("iacute","\u00CD");
+    registerEntity("icirc","\u00CE");
+    registerEntity("iuml","\u00CF");
+    registerEntity("eth","\u00D0");
+    registerEntity("ntilde","\u00D1");
+    registerEntity("ograve","\u00D2");
+    registerEntity("oacute","\u00D3");
+    registerEntity("ocirc","\u00D4");
+    registerEntity("otilde","\u00D5");
+    registerEntity("ouml","\u00D6");
+    registerEntity("times","\u00D7");
+    registerEntity("oslash","\u00D8");
+    registerEntity("ugrave","\u00D9");
+    registerEntity("uacute","\u00DA");
+    registerEntity("ucirc","\u00DB");
+    registerEntity("uuml","\u00DC");
+    registerEntity("yacute","\u00DD");
+    registerEntity("thorn","\u00DE");
+    registerEntity("szlig","\u00DF");
+    registerEntity("agrave","\u00E0");
+    registerEntity("aacute","\u00E1");
+    registerEntity("acirc","\u00E2");
+    registerEntity("atilde","\u00E3");
+    registerEntity("auml","\u00E4");
+    registerEntity("aring","\u00E5");
+    registerEntity("aelig","\u00E6");
+    registerEntity("ccedil","\u00E7");
+    registerEntity("egrave","\u00E8");
+    registerEntity("eacute","\u00E9");
+    registerEntity("ecirc","\u00EA");
+    registerEntity("euml","\u00EB");
+    registerEntity("igrave","\u00EC");
+    registerEntity("iacute","\u00ED");
+    registerEntity("icirc","\u00EE");
+    registerEntity("iuml","\u00EF");
+    registerEntity("eth","\u00F0");
+    registerEntity("ntilde","\u00F1");
+    registerEntity("ograve","\u00F2");
+    registerEntity("oacute","\u00F3");
+    registerEntity("ocirc","\u00F4");
+    registerEntity("otilde","\u00F5");
+    registerEntity("ouml","\u00F6");
+    registerEntity("divide","\u00F7");
+    registerEntity("oslash","\u00F8");
+    registerEntity("ugrave","\u00F9");
+    registerEntity("uacute","\u00FA");
+    registerEntity("ucirc","\u00FB");
+    registerEntity("uuml","\u00FC");
+    registerEntity("yacute","\u00FD");
+    registerEntity("thorn","\u00FE");
+    registerEntity("yuml","\u00FF");
+    registerEntity("oelig","\u0152");
+    registerEntity("oelig","\u0153");
+    registerEntity("scaron","\u0160");
+    registerEntity("scaron","\u0161");
+    registerEntity("yuml","\u0178");
+    registerEntity("fnof","\u0192");
+    registerEntity("circ","\u02C6");
+    registerEntity("tilde","\u02DC");
+    registerEntity("alpha","\u0391");
+    registerEntity("beta","\u0392");
+    registerEntity("gamma","\u0393");
+    registerEntity("delta","\u0394");
+    registerEntity("epsilon","\u0395");
+    registerEntity("zeta","\u0396");
+    registerEntity("eta","\u0397");
+    registerEntity("theta","\u0398");
+    registerEntity("iota","\u0399");
+    registerEntity("kappa","\u039A");
+    registerEntity("lambda","\u039B");
+    registerEntity("mu","\u039C");
+    registerEntity("nu","\u039D");
+    registerEntity("xi","\u039E");
+    registerEntity("omicron","\u039F");
+    registerEntity("pi","\u03A0");
+    registerEntity("rho","\u03A1");
+    registerEntity("sigma","\u03A3");
+    registerEntity("tau","\u03A4");
+    registerEntity("upsilon","\u03A5");
+    registerEntity("phi","\u03A6");
+    registerEntity("chi","\u03A7");
+    registerEntity("psi","\u03A8");
+    registerEntity("omega","\u03A9");
+    registerEntity("alpha","\u03B1");
+    registerEntity("beta","\u03B2");
+    registerEntity("gamma","\u03B3");
+    registerEntity("delta","\u03B4");
+    registerEntity("epsilon","\u03B5");
+    registerEntity("zeta","\u03B6");
+    registerEntity("eta","\u03B7");
+    registerEntity("theta","\u03B8");
+    registerEntity("iota","\u03B9");
+    registerEntity("kappa","\u03BA");
+    registerEntity("lambda","\u03BB");
+    registerEntity("mu","\u03BC");
+    registerEntity("nu","\u03BD");
+    registerEntity("xi","\u03BE");
+    registerEntity("omicron","\u03BF");
+    registerEntity("pi","\u03C0");
+    registerEntity("rho","\u03C1");
+    registerEntity("sigmaf","\u03C2");
+    registerEntity("sigma","\u03C3");
+    registerEntity("tau","\u03C4");
+    registerEntity("upsilon","\u03C5");
+    registerEntity("phi","\u03C6");
+    registerEntity("chi","\u03C7");
+    registerEntity("psi","\u03C8");
+    registerEntity("omega","\u03C9");
+    registerEntity("thetasym","\u03D1");
+    registerEntity("upsih","\u03D2");
+    registerEntity("piv","\u03D6");
+    registerEntity("ensp","\u2002");
+    registerEntity("emsp","\u2003");
+    registerEntity("thinsp","\u2009");
+    registerEntity("zwnj","\u200C");
+    registerEntity("zwj","\u200D");
+    registerEntity("lrm","\u200E");
+    registerEntity("rlm","\u200F");
+    registerEntity("ndash","\u2013");
+    registerEntity("mdash","\u2014");
+    registerEntity("lsquo","\u2018");
+    registerEntity("rsquo","\u2019");
+    registerEntity("sbquo","\u201A");
+    registerEntity("ldquo","\u201C");
+    registerEntity("rdquo","\u201D");
+    registerEntity("bdquo","\u201E");
+    registerEntity("dagger","\u2020");
+    registerEntity("dagger","\u2021");
+    registerEntity("bull","\u2022");
+    registerEntity("hellip","\u2026");
+    registerEntity("permil","\u2030");
+    registerEntity("prime","\u2032");
+    registerEntity("prime","\u2033");
+    registerEntity("lsaquo","\u2039");
+    registerEntity("rsaquo","\u203A");
+    registerEntity("oline","\u203E");
+    registerEntity("frasl","\u2044");
+    registerEntity("euro","\u20AC");
+    registerEntity("image","\u2111");
+    registerEntity("weierp","\u2118");
+    registerEntity("real","\u211C");
+    registerEntity("trade","\u2122");
+    registerEntity("alefsym","\u2135");
+    registerEntity("larr","\u2190");
+    registerEntity("uarr","\u2191");
+    registerEntity("rarr","\u2192");
+    registerEntity("darr","\u2193");
+    registerEntity("harr","\u2194");
+    registerEntity("crarr","\u21B5");
+    registerEntity("larr","\u21D0");
+    registerEntity("uarr","\u21D1");
+    registerEntity("rarr","\u21D2");
+    registerEntity("darr","\u21D3");
+    registerEntity("harr","\u21D4");
+    registerEntity("forall","\u2200");
+    registerEntity("part","\u2202");
+    registerEntity("exist","\u2203");
+    registerEntity("empty","\u2205");
+    registerEntity("nabla","\u2207");
+    registerEntity("isin","\u2208");
+    registerEntity("notin","\u2209");
+    registerEntity("ni","\u220B");
+    registerEntity("prod","\u220F");
+    registerEntity("sum","\u2211");
+    registerEntity("minus","\u2212");
+    registerEntity("lowast","\u2217");
+    registerEntity("radic","\u221A");
+    registerEntity("prop","\u221D");
+    registerEntity("infin","\u221E");
+    registerEntity("ang","\u2220");
+    registerEntity("and","\u2227");
+    registerEntity("or","\u2228");
+    registerEntity("cap","\u2229");
+    registerEntity("cup","\u222A");
+    registerEntity("int","\u222B");
+    registerEntity("there4","\u2234");
+    registerEntity("sim","\u223C");
+    registerEntity("cong","\u2245");
+    registerEntity("asymp","\u2248");
+    registerEntity("ne","\u2260");
+    registerEntity("equiv","\u2261");
+    registerEntity("le","\u2264");
+    registerEntity("ge","\u2265");
+    registerEntity("sub","\u2282");
+    registerEntity("sup","\u2283");
+    registerEntity("nsub","\u2284");
+    registerEntity("sube","\u2286");
+    registerEntity("supe","\u2287");
+    registerEntity("oplus","\u2295");
+    registerEntity("otimes","\u2297");
+    registerEntity("perp","\u22A5");
+    registerEntity("sdot","\u22C5");
+    registerEntity("lceil","\u2308");
+    registerEntity("rceil","\u2309");
+    registerEntity("lfloor","\u230A");
+    registerEntity("rfloor","\u230B");
+    registerEntity("lang","\u2329");
+    registerEntity("rang","\u232A");
+    registerEntity("loz","\u25CA");
+    registerEntity("spades","\u2660");
+    registerEntity("clubs","\u2663");
+    registerEntity("hearts","\u2665");
+    registerEntity("diams","\u2666");
+  }
+  
+  public String resolveEntity(String name) {
+    return resolveentities ? entities.get(name.toLowerCase()) : null;
+  }
+  
+  public void setResolveEntities(boolean resolve) {
+    this.resolveentities = resolve;
+  }
+  
+  public boolean getResolveEntities() {
+    return this.resolveentities;
+  }
 }

Modified: incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java?rev=584310&r1=584309&r2=584310&view=diff
==============================================================================
--- incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
(original)
+++ incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
Fri Oct 12 15:45:28 2007
@@ -239,7 +239,11 @@
                 if (!ignorePI) createPI();
                 break;
             case XMLStreamConstants.ENTITY_REFERENCE:
-                lastNode = createOMText(XMLStreamConstants.ENTITY_REFERENCE);
+                String val = parserOptions.resolveEntity(super.getName());
+                if (val == null)
+                  throw new ParseException("Unresolved undeclared entity: " + super.getName());
+                else 
+                  lastNode = createOMText(val,XMLStreamConstants.CHARACTERS);
                 break;
             default :
                 throw new ParseException();

Modified: incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMParser.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMParser.java?rev=584310&r1=584309&r2=584310&view=diff
==============================================================================
--- incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMParser.java
(original)
+++ incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMParser.java
Fri Oct 12 15:45:28 2007
@@ -20,10 +20,13 @@
 import java.io.InputStream;
 import java.io.Reader;
 
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 
 import org.apache.abdera.Abdera;
 import org.apache.abdera.factory.Factory;
+import org.apache.abdera.i18n.iri.IRI;
 import org.apache.abdera.model.Document;
 import org.apache.abdera.model.Element;
 import org.apache.abdera.parser.ParseException;
@@ -34,7 +37,6 @@
 import org.apache.abdera.util.AbstractParser;
 import org.apache.abdera.util.CompressionUtil;
 import org.apache.abdera.util.Messages;
-import org.apache.abdera.i18n.iri.IRI;
 import org.apache.axiom.om.OMDocument;
 import org.apache.axiom.om.util.StAXUtils;
 
@@ -139,12 +141,24 @@
         in = new FOMXmlRestrictedCharReader(
           in,options.getFilterRestrictedCharacterReplacement());
       }
-      return parse(StAXUtils.createXMLStreamReader(in), base, options);
+      //return parse(StAXUtils.createXMLStreamReader(in), base, options);
+      return parse(createXMLStreamReader(in), base, options);
     } catch (Exception e) {
       if (!(e instanceof ParseException))
         e = new ParseException(e);
       throw (ParseException)e;
     }
+  }
+  
+  private XMLStreamReader createXMLStreamReader(Reader in) throws XMLStreamException {
+    javax.xml.stream.XMLInputFactory inputFactory = StAXUtils.getXMLInputFactory();
+    try {
+      inputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
+      XMLStreamReader reader = inputFactory.createXMLStreamReader(in);
+      return reader;
+    } finally {
+        StAXUtils.releaseXMLInputFactory(inputFactory);
+    }    
   }
   
   private <T extends Element> Document<T> parse(



Mime
View raw message