jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r418444 - /jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java
Date Sat, 01 Jul 2006 06:20:55 GMT
Author: jukka
Date: Fri Jun 30 23:20:55 2006
New Revision: 418444

URL: http://svn.apache.org/viewvc?rev=418444&view=rev
Log:
JCR-470: Include XML attribute values in index.

Modified:
    jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java

Modified: jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java?rev=418444&r1=418443&r2=418444&view=diff
==============================================================================
--- jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java
(original)
+++ jackrabbit/trunk/textfilters/src/java/org/apache/jackrabbit/core/query/XMLTextFilter.java
Fri Jun 30 23:20:55 2006
@@ -44,8 +44,6 @@
  */
 public class XMLTextFilter implements TextFilter {
 
-    private org.apache.jackrabbit.core.query.XMLTextFilter.XMLParser parser;
-
     /**
      * @return <code>true</code> for <code>text/xml</code>, <code>false</code>
      *         otherwise.
@@ -67,29 +65,38 @@
      */
     public Map doFilter(PropertyState data, String encoding)
             throws RepositoryException {
-
-        if (parser == null) {
-            initParser();
-        }
-
         InternalValue[] values = data.getValues();
         if (values.length > 0) {
             final BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
             LazyReader reader = new LazyReader() {
                 protected void initializeReader() throws IOException {
-                    InputStream in;
                     try {
-                        in = blob.getStream();
-                    } catch (RepositoryException e) {
+                        StringBuffer buffer = new StringBuffer();
+                        XMLParser parser = new XMLParser(buffer);
+
+                        SAXParserFactory saxParserFactory =
+                            SAXParserFactory.newInstance();
+                        saxParserFactory.setValidating(false);
+                        SAXParser saxParser = saxParserFactory.newSAXParser();
+                        XMLReader xmlReader = saxParser.getXMLReader();
+                        xmlReader.setContentHandler(parser);
+                        xmlReader.setErrorHandler(parser);
+
+                        InputStream in = blob.getStream();
+                        try {
+                            InputSource source = new InputSource(in);
+                            xmlReader.parse(source);
+                            delegate = new StringReader(buffer.toString());
+                        } finally {
+                            in.close();
+                        }
+                    } catch (SAXException se) {
+                        throw new IOException(se.getMessage());
+                    } catch (RepositoryException se) {
+                        throw new IOException(se.getMessage());
+                    } catch (ParserConfigurationException e) {
                         throw new IOException(e.getMessage());
                     }
-                    try {
-                        parser.parse(in);
-                        String text = parser.getContents();
-                        delegate = new StringReader(text);
-                    } finally {
-                        in.close();
-                    }
                 }
             };
 
@@ -103,131 +110,54 @@
     }
 
     /**
-     * Inits the parser engine
-     *
-     * @throws javax.jcr.RepositoryException If some error happens
-     */
-    private void initParser() throws javax.jcr.RepositoryException {
-
-        try {
-            SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
-            saxParserFactory.setValidating(false);
-            SAXParser saxParser = saxParserFactory.newSAXParser();
-            XMLReader xmlReader = saxParser.getXMLReader();
-            parser = new XMLParser(xmlReader);
-        } catch (ParserConfigurationException e) {
-            throw new javax.jcr.RepositoryException();
-        } catch (SAXException e) {
-            throw new javax.jcr.RepositoryException();
-        }
-    }
-
-    /**
-     * Private helper XML parser. It only processes text elements. Feel free to
-     * change for adding support for attributes or tags text extraction.
+     * Private helper XML parser. It only processes text elements and
+     * attributes. Feel free to change for adding support for tags text
+     * extraction.
      */
-    private class XMLParser extends DefaultHandler implements ErrorHandler {
-
-        private XMLReader xmlReader;
-        private StringBuffer buffer;
-
-        public XMLParser(XMLReader xmlReader) {
-
-            try {
-
-                this.xmlReader = xmlReader;
-                this.xmlReader.setContentHandler(this);
-                this.xmlReader.setErrorHandler(this);
-
-            } catch (Exception ex) {
-
-            }
-        }
-
-        public void startDocument() throws SAXException {
-
-            buffer = new StringBuffer();
-        }
+    private static class XMLParser extends DefaultHandler implements ErrorHandler {
 
-        public void startElement(String namespaceURI, String localName,
-                                 String rawName, Attributes atts)
-                throws SAXException {
-        }
-
-        public void characters(char[] ch,
-                               int start,
-                               int length) throws SAXException {
-
-
-            buffer.append(ch, start, length);
-        }
-
-        public void endElement(java.lang.String namespaceURI,
-                               java.lang.String localName,
-                               java.lang.String qName)
-                throws SAXException {
-        }
-
-
-        public void warning(SAXParseException spe) throws SAXException {
-
-
-        }
-
-        public void error(SAXParseException spe) throws SAXException {
+        private final StringBuffer buffer;
 
+        public XMLParser(StringBuffer buffer) {
+            this.buffer = buffer;
         }
 
-        public void fatalError(SAXParseException spe) throws SAXException {
-
-        }
-
-        public void parse(InputStream is) throws IOException {
-
-            try {
-                InputSource source = new InputSource(is);
-                xmlReader.parse(source);
-            } catch (SAXException se) {
-                throw new IOException(se.getMessage());
+        public void startElement(
+                String uri, String local, String name, Attributes attributes) {
+            for (int i = 0; i < attributes.getLength(); i++) {
+                // Add spaces to separate the attribute value from other content
+                String value = " " + attributes.getValue(i) + " ";
+                characters(value.toCharArray(), 0, value.length());
             }
         }
 
-        private String filterAndJoin(String text) {
-
+        public void characters(char[] ch, int start, int length) {
             boolean space = false;
-            StringBuffer buffer = new StringBuffer();
-            for (int i = 0; i < text.length(); i++) {
-                char c = text.charAt(i);
-
-                if ((c == '\n') || (c == ' ') || Character.isWhitespace(c)) {
+            for (int i = start; i < length; i++) {
+                if (Character.isLetterOrDigit(ch[i])) {
                     if (space) {
-                        continue;
-                    } else {
-                        space = true;
                         buffer.append(' ');
-                        continue;
+                        space = false;
                     }
+                    buffer.append(ch[i]);
                 } else {
-                    if (!Character.isLetter(c)) {
-                        if (!space) {
-                            space = true;
-                            buffer.append(' ');
-                            continue;
-                        }
-                        continue;
-                    }
+                    space = true;
                 }
-                space = false;
-                buffer.append(c);
             }
-            return buffer.toString();
+            if (space) {
+                buffer.append(' ');
+            }
+        }
+
+        public void warning(SAXParseException spe) {
         }
 
-        public String getContents() {
+        public void error(SAXParseException spe) {
+        }
 
-            String text = filterAndJoin(buffer.toString());
-            return text;
+        public void fatalError(SAXParseException spe) {
         }
 
     }
+
 }



Mime
View raw message