lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bugzi...@apache.org
Subject DO NOT REPLY [Bug 24370] New: - contributions XML indexing demo: SAX parsing updates to SAX2
Date Mon, 03 Nov 2003 22:44:49 GMT
DO NOT REPLY TO THIS EMAIL, BUT PLEASE POST YOUR BUG 
RELATED COMMENTS THROUGH THE WEB INTERFACE AVAILABLE AT
<http://nagoya.apache.org/bugzilla/show_bug.cgi?id=24370>.
ANY REPLY MADE TO THIS MESSAGE WILL NOT BE COLLECTED AND 
INSERTED IN THE BUG DATABASE.

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=24370

contributions XML indexing demo: SAX parsing updates to SAX2

           Summary: contributions XML indexing demo: SAX parsing updates to
                    SAX2
           Product: Lucene
           Version: unspecified
          Platform: All
        OS/Version: All
            Status: NEW
          Severity: Enhancement
          Priority: Other
         Component: Other
        AssignedTo: lucene-dev@jakarta.apache.org
        ReportedBy: cdevarenne@yahoo.com


While using the SAX XML indexing demo in the contributions project, I updated 
the code for SAX2.  Here is the CVS diff for the code update.  Erik Hatcher 
mentioned he could submit this.  I hope the attachement is OK if not please 
contact me.  Thanks.



diff -u -r1.1 XMLDocumentHandlerSAX.java
--- XMLDocumentHandlerSAX.java	21 Jun 2002 15:02:51 -0000	1.1
+++ XMLDocumentHandlerSAX.java	3 Nov 2003 22:33:44 -0000
@@ -1,62 +1,96 @@
 package org.apache.lucenesandbox.xmlindexingdemo;
 
-import org.xml.sax.*;
-import org.xml.sax.helpers.*;
-import org.xml.sax.AttributeList;
-import javax.xml.parsers.*;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
 import java.io.File;
 import java.io.IOException;
 
-public class XMLDocumentHandlerSAX
-    extends HandlerBase
-{
-    /** A buffer for each XML element */
-    private StringBuffer elementBuffer = new StringBuffer();
-
-    private Document mDocument;
-
-    // constructor
-    public XMLDocumentHandlerSAX(File xmlFile)
-	throws ParserConfigurationException, SAXException, IOException
-    {
-	SAXParserFactory spf = SAXParserFactory.newInstance();
-
-	SAXParser parser = spf.newSAXParser();
-	parser.parse(xmlFile, this);
-    }
-
-    // call at document start
-    public void startDocument()
-    {
-	mDocument = new Document();
-    }
-
-    // call at element start
-    public void startElement(String localName, AttributeList atts)
-	throws SAXException
-    {
-        elementBuffer.setLength(0);
-    }
-
-    // call when cdata found
-    public void characters(char[] text, int start, int length)
-    {
-	elementBuffer.append(text, start, length);
-    }
-
-    // call at element end
-    public void endElement(String localName)
-	throws SAXException
-    {
-	    mDocument.add(Field.Text(localName, elementBuffer.toString()));
-    }
-
-    public Document getDocument()
-    {
-	return mDocument;
-    }
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class XMLDocumentHandlerSAX extends DefaultHandler {
+	/** A buffer for each XML element */
+	private StringBuffer elementBuffer = new StringBuffer();
+
+	private Document mDocument;
+
+	// constructor
+	public XMLDocumentHandlerSAX(File xmlFile)
+		throws ParserConfigurationException, SAXException, IOException {
+		SAXParserFactory spf = SAXParserFactory.newInstance();
+		// use validating parser
+		spf.setValidating(true);
+		// make the parser name space aware turn
+		//spf.setNamespaceAware(true);
+
+
+		SAXParser parser = spf.newSAXParser();
+		parser.parse(xmlFile, this);
+	}
+
+	// call at document start
+	public void startDocument() throws SAXException {
+		mDocument = new Document();
+	}
+
+	// call at element start
+	public void startElement(
+		String namespaceURI,
+		String localName,
+		String qualifiedName,
+		Attributes attrs)
+		throws SAXException {
+		String eName = localName;
+		if ("".equals(eName)) {
+			eName = qualifiedName; // namespaceAware = false
+		}
+		// list the attribute(s)
+        if (attrs != null) {
+            for (int i = 0; i < attrs.getLength(); i++) {
+                String aName = attrs.getLocalName(i); // Attr name 
+                if ("".equals(aName)) { aName = attrs.getQName(i); }
+                // perform application specific action on attribute(s)
+                // for now just dump out attribute name and value
+                System.out.println("attr " + aName+"="+attrs.getValue(i));
+            }
+        }		
+		elementBuffer.setLength(0);
+	}
+
+	// call when cdata found
+	public void characters(char[] text, int start, int length)
+		throws SAXException {
+		elementBuffer.append(text, start, length);
+	}
+
+	// call at element end
+	public void endElement(
+		String namespaceURI,
+		String simpleName,
+		String qualifiedName) {
+		
+		String eName = simpleName;
+		if ("".equals(eName)) {
+			eName = qualifiedName; // namespaceAware = false	
+		}
+		/*
+		System.out.println(
+			"endElement eName: "
+				+ eName
+				+ "\teltBuff:  "
+				+ elementBuffer.toString());
+		*/			
+		mDocument.add(Field.Text(eName, elementBuffer.toString()));	
	
+
+		
+	}
+
+	public Document getDocument() {
+		return mDocument;
+	}
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message