abdera-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmsn...@apache.org
Subject svn commit: r611972 - in /incubator/abdera/java/trunk: build/ dependencies/ dependencies/legal/ extensions/html/ extensions/html/src/ extensions/html/src/main/ extensions/html/src/main/java/ extensions/html/src/main/java/org/ extensions/html/src/main/j...
Date Mon, 14 Jan 2008 23:50:45 GMT
Author: jmsnell
Date: Mon Jan 14 15:50:31 2008
New Revision: 611972

URL: http://svn.apache.org/viewvc?rev=611972&view=rev
Log:
Add the new html module. This adds the ability to parse html using Abdera and adds rudimentary
link discovery
(for discovering service documents and feeds)

Added:
    incubator/abdera/java/trunk/dependencies/legal/htmlparser.LICENSE.txt
    incubator/abdera/java/trunk/dependencies/legal/serializer.LICENSE.txt
    incubator/abdera/java/trunk/dependencies/legal/serializer.NOTICE.txt
    incubator/abdera/java/trunk/extensions/html/
    incubator/abdera/java/trunk/extensions/html/pom.xml
    incubator/abdera/java/trunk/extensions/html/src/
    incubator/abdera/java/trunk/extensions/html/src/Test.java
    incubator/abdera/java/trunk/extensions/html/src/main/
    incubator/abdera/java/trunk/extensions/html/src/main/java/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlCleaner.java
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlHelper.java
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParser.java
    incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParserOptions.java
    incubator/abdera/java/trunk/extensions/html/src/main/resources/
    incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/
    incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/
    incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/org.apache.abdera.parser.NamedParser
    incubator/abdera/java/trunk/extensions/html/src/test/
    incubator/abdera/java/trunk/extensions/html/src/test/java/
    incubator/abdera/java/trunk/extensions/html/src/test/resources/
Modified:
    incubator/abdera/java/trunk/build/build.xml
    incubator/abdera/java/trunk/dependencies/deps.properties

Modified: incubator/abdera/java/trunk/build/build.xml
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/build/build.xml?rev=611972&r1=611971&r2=611972&view=diff
==============================================================================
--- incubator/abdera/java/trunk/build/build.xml (original)
+++ incubator/abdera/java/trunk/build/build.xml Mon Jan 14 15:50:31 2008
@@ -187,6 +187,8 @@
           <available file="${dependencies}/${wstx.jar}" />
           <available file="${dependencies}/${jetty.jar}" />
           <available file="${dependencies}/${jetty-util.jar}" />
+          <available file="${dependencies}/${htmlparser.jar}" />
+          <available file="${dependencies}/${htmlserializer.jar}" />
           <available file="${dependencies}/${json.zip}" />
         </and>
       </not>
@@ -415,6 +417,7 @@
   </target>
 
   <target name="clean">
+    <!--
     <delete dir="${work}" />
     <delete dir="${test}" />
     <delete dir="${dist}" />
@@ -425,6 +428,7 @@
       <fileset dir="${basedir}" includes="*.zip" />
       <fileset dir="${basedir}" includes="*.md5" />
     </delete>
+    -->
   </target>
 
   <target name="dist" depends="clean,build,test,docs">
@@ -573,6 +577,8 @@
       <get src="${wstx.dir}/${wstx.jar}" dest="${dependencies}/${wstx.jar}" usetimestamp="true"
/>
       <get src="${jetty.dir}/${jetty.jar}" dest="${dependencies}/${jetty.jar}" usetimestamp="true"
/>
       <get src="${jetty-util.dir}/${jetty-util.jar}" dest="${dependencies}/${jetty-util.jar}"
usetimestamp="true" />
+      <get src="${htmlparser.dir}/${htmlparser.jar}" dest="${dependencies}/${htmlparser.jar}"
usetimestamp="true" />
+      <get src="${htmlparser.dir}/${htmlserializer.jar}" dest="${dependencies}/${htmlserializer.jar}"
usetimestamp="true" />
       <get src="${json.dir}/${json.zip}" dest="${dependencies}/${json.zip}" usetimestamp="true"
/>
     </parallel>
     <mkdir dir="${json.src}/main/java/org/json" />
@@ -711,6 +717,10 @@
       <param name="ext" value="oauth" />
     </antcall>
     
+    <antcall target="compile.extension">
+      <param name="ext" value="html" />
+    </antcall>
+    
   </target>
   
   <target name="compile.extension.ask">
@@ -790,6 +800,10 @@
       <param name="ext" value="oauth" />
     </antcall>
     
+    <antcall target="dist.extension">
+      <param name="ext" value="html" />
+    </antcall>
+    
   </target>
   
   <target name="dist.extension">
@@ -844,6 +858,10 @@
       <param name="ext" value="oauth" />
     </antcall>
     
+    <antcall target="test.extension">
+      <param name="ext" value="html" />
+    </antcall>
+    
   </target>
   
   <target name="test.extension">
@@ -890,6 +908,10 @@
 
     <antcall target="retro.extension">
       <param name="ext" value="oauth" />
+    </antcall>
+    
+    <antcall target="retro.extension">
+      <param name="ext" value="html" />
     </antcall>
   </target>
   

Modified: incubator/abdera/java/trunk/dependencies/deps.properties
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/deps.properties?rev=611972&r1=611971&r2=611972&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/deps.properties (original)
+++ incubator/abdera/java/trunk/dependencies/deps.properties Mon Jan 14 15:50:31 2008
@@ -1,6 +1,3 @@
-geronimo.activation.dir=http://www.apache.org/dist/java-repository/org.apache.geronimo.specs/jars
-geronimo.activation.jar=geronimo-activation_1.0.2_spec-1.1.jar
-
 axiom.dir=http://people.apache.org/~jmsnell
 axiom.api.jar=axiom-api-1.2.5.jar
 axiom.impl.jar=axiom-impl-1.2.5.jar
@@ -26,34 +23,46 @@
 commons.httpclient.dir=http://www.apache.org/dist/java-repository/commons-httpclient/jars
 commons.httpclient.jar=commons-httpclient-3.1.jar
 
+#### Required for build ####
 junit.dir=http://www.ibiblio.org/maven/junit/jars
 junit.jar=junit-4.3.jar
 
+#### Required for build ####
+geronimo.activation.dir=http://www.apache.org/dist/java-repository/org.apache.geronimo.specs/jars
+geronimo.activation.jar=geronimo-activation_1.0.2_spec-1.1.jar
 geronimo.servlet.dir=http://www.apache.org/dist/java-repository/org.apache.geronimo.specs/jars
 geronimo.servlet.jar=geronimo-servlet_2.4_spec-1.0.jar
 
+#### Required for build and JDK 1.4.2 support ####
 retroweaver.version=2.0
 retroweaver.dir=http://people.apache.org/~jmsnell
 retroweaver.zip=retroweaver-2.0.zip
 
+#### Optional for Security support ####
 xmlsecurity.dir=http://people.apache.org/~jmsnell
 xmlsecurity.jar=xmlsec-1.4.1.jar
-
 bouncycastle.dir=http://www.bouncycastle.org/download
 bouncycastle.jar=bcprov-jdk15-137.jar
 bouncycastle.jar.retro=bcprov-jdk14-137.jar
 bouncycastle.provider=org.bouncycastle.jce.provider.BouncyCastleProvider
 
+#### Needed only for build and testing ####
 jetty.dir=http://people.apache.org/~jmsnell
 jetty.jar=jetty-6.1.3.jar
-
 jetty-util.dir=http://people.apache.org/~jmsnell
 jetty-util.jar=jetty-util-6.1.3.jar
 
+#### Deprecated ####
 json.dir=http://json.org/java
 json.zip=apache.zip
 
-
+#### Optional for Spring integration ####
 spring.dir=http://downloads.sourceforge.net/springframework
 spring.zip=spring-framework-2.0.6.zip
-spring.name=spring-framework-2.0.6
\ No newline at end of file
+spring.name=spring-framework-2.0.6
+
+#### Optional for HTML parsing support ####
+htmlparser.zip=http://about.validator.nu/htmlparser/htmlparser-1.0.5.zip
+htmlparser.dir=http://people.apache.org/~jmsnell
+htmlparser.jar=htmlparser-1.0.5.jar
+htmlserializer.jar=serializer.jar
\ No newline at end of file

Added: incubator/abdera/java/trunk/dependencies/legal/htmlparser.LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/legal/htmlparser.LICENSE.txt?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/legal/htmlparser.LICENSE.txt (added)
+++ incubator/abdera/java/trunk/dependencies/legal/htmlparser.LICENSE.txt Mon Jan 14 15:50:31
2008
@@ -0,0 +1,26 @@
+Info:
+http://about.validator.nu/htmlparser/
+
+License:
+
+/*
+ * Copyright (c) 2007 Henri Sivonen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a 
+ * copy of this software and associated documentation files (the "Software"), 
+ * to deal in the Software without restriction, including without limitation 
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+ * and/or sell copies of the Software, and to permit persons to whom the 
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in 
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ */

Added: incubator/abdera/java/trunk/dependencies/legal/serializer.LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/legal/serializer.LICENSE.txt?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/legal/serializer.LICENSE.txt (added)
+++ incubator/abdera/java/trunk/dependencies/legal/serializer.LICENSE.txt Mon Jan 14 15:50:31
2008
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Added: incubator/abdera/java/trunk/dependencies/legal/serializer.NOTICE.txt
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/legal/serializer.NOTICE.txt?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/legal/serializer.NOTICE.txt (added)
+++ incubator/abdera/java/trunk/dependencies/legal/serializer.NOTICE.txt Mon Jan 14 15:50:31
2008
@@ -0,0 +1,18 @@
+   =========================================================================
+   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+   ==  Version 2.0, in this case for the Apache Xalan Java distribution.  ==
+   =========================================================================
+
+   Apache Xalan (Xalan serializer)
+   Copyright 1999-2006 The Apache Software Foundation
+
+   This product includes software developed at
+   The Apache Software Foundation (http://www.apache.org/).
+
+   Portions of this software was originally based on the following:
+     - software copyright (c) 1999-2002, Lotus Development Corporation.,
+       http://www.lotus.com.
+     - software copyright (c) 2001-2002, Sun Microsystems.,
+       http://www.sun.com.
+     - software copyright (c) 2003, IBM Corporation., 
+       http://www.ibm.com.

Added: incubator/abdera/java/trunk/extensions/html/pom.xml
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/pom.xml?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/pom.xml (added)
+++ incubator/abdera/java/trunk/extensions/html/pom.xml Mon Jan 14 15:50:31 2008
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  The ASF licenses this file to You
+  under the Apache License, Version 2.0 (the "License"); you may not
+  use this file except in compliance with the License.
+  You may obtain a copy of the License at
+ 
+      http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.  For additional information regarding
+  copyright in this work, please see the NOTICE file in the top level
+  directory of this distribution. -->
+<project 
+  xmlns="http://maven.apache.org/POM/4.0.0" 
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <parent>
+    <groupId>org.apache.abdera</groupId>
+    <artifactId>abdera</artifactId>
+    <version>0.4.0-incubating-SNAPSHOT</version>
+  </parent>  
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>abdera-extensions-html</artifactId>
+  <packaging>jar</packaging>
+  <name>Abdera Extensions - HTML</name>
+  <version>0.4.0-incubating-SNAPSHOT</version>
+  <description>Atom Specification Extensions - HTML</description>
+  <inceptionYear>2006</inceptionYear>
+  <url>http://incubator.apache.org/abdera</url>
+  <scm>
+    <connection>scm:svn:http://svn.apache.org/repos/asf/incubator/abdera/java/trunk/extensions/html</connection>
+    <developerConnection>scm:svn:https://svn.apache.org/repos/asf/incubator/abdera/java/trunk/extensions/html</developerConnection>
+    <url>http://svn.apache.org/repos/asf/incubator/abdera/java/trunk/extensions/html</url>
+  </scm>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.abdera</groupId>
+      <artifactId>abdera-core</artifactId>
+      <version>0.4.0-incubating-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>    
+    <dependency>
+      <groupId>org.apache.abdera</groupId>
+      <artifactId>abdera-parser</artifactId>
+      <version>0.4.0-incubating-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>   
+    <dependency>
+      <groupId>org.apache.abdera</groupId>
+      <artifactId>abdera-protocol</artifactId>
+      <version>0.4.0-incubating-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>  
+    <dependency>
+      <groupId>org.apache.abdera</groupId>
+      <artifactId>abdera-client</artifactId>
+      <version>0.4.0-incubating-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>  
+    <dependency>
+      <groupId>org.apache.ws.commons.axiom</groupId>
+      <artifactId>axiom-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.ws.commons.axiom</groupId>
+      <artifactId>axiom-impl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>stax</groupId>
+      <artifactId>stax-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.woodstox</groupId>
+      <artifactId>wstx-asl</artifactId>
+    </dependency> 
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>nu.validator.htmlparser</groupId>  
+      <artifactId>htmlparser</artifactId>
+    </dependency>
+  </dependencies>
+</project>

Added: incubator/abdera/java/trunk/extensions/html/src/Test.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/Test.java?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/Test.java (added)
+++ incubator/abdera/java/trunk/extensions/html/src/Test.java Mon Jan 14 15:50:31 2008
@@ -0,0 +1,36 @@
+import org.apache.abdera.Abdera;
+import org.apache.abdera.ext.html.HtmlCleaner;
+import org.apache.abdera.model.Entry;
+
+//import org.apache.abdera.ext.html.HtmlHelper;
+//import org.apache.abdera.model.Element;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+
+public class Test {
+
+  public static void main(String... args) throws Exception {
+    
+   Abdera abdera = Abdera.getInstance();
+   Entry entry = abdera.newEntry();
+   entry.setContentAsXhtml(HtmlCleaner.parse("<p>test<br>foo"));
+   System.out.println(entry);
+  }
+   
+}

Added: incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlCleaner.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlCleaner.java?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlCleaner.java
(added)
+++ incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlCleaner.java
Mon Jan 14 15:50:31 2008
@@ -0,0 +1,111 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.ext.html;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.Writer;
+import java.util.Arrays;
+
+import nu.validator.htmlparser.common.XmlViolationPolicy;
+import nu.validator.htmlparser.sax.HtmlSerializer;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class HtmlCleaner {
+  
+  private HtmlCleaner() {}
+  
+  public static String parse(String value) {
+    return parse(new StringReader(value),true);
+  }
+  
+  public static String parse(InputStream in) {
+    return parse(in, "UTF-8");
+  }
+  
+  public static String parse(InputStream in, String charset) {
+    try {
+      return parse(new InputStreamReader(in, charset),true);
+    } catch (RuntimeException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public static String parse(Reader in, boolean fragment) {
+    try {
+      nu.validator.htmlparser.sax.HtmlParser htmlParser = new nu.validator.htmlparser.sax.HtmlParser();
+      htmlParser.setBogusXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);     
+      htmlParser.setMappingLangToXmlLang(true);
+      htmlParser.setReportingDoctype(false);          
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      Writer w = new OutputStreamWriter(out, "UTF-8");
+      HtmlSerializer ser = new VoidElementFixHtmlSerializer(w);
+      htmlParser.setContentHandler(ser);
+      htmlParser.setLexicalHandler(ser);
+      if (!fragment)
+        htmlParser.parse(new InputSource(in));
+      else 
+        htmlParser.parseFragment(new InputSource(in), "div");
+      try {
+        w.flush();
+      } catch (IOException e) {}
+      return new String(out.toByteArray(),"UTF-8");
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new RuntimeException(e.getMessage());
+    }
+  }
+  
+  private static class VoidElementFixHtmlSerializer extends HtmlSerializer {
+    private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
+      "bgsound", "br", "col", "embed", "frame", "hr", "img", "input",
+      "link", "meta", "param", "spacer", "wbr" };
+    private final Writer writer;
+    public VoidElementFixHtmlSerializer(Writer out) {
+      super(out);
+      this.writer = out;
+    }
+    @Override 
+    public void endElement(
+      String uri, 
+      String localName, 
+      String name)
+        throws SAXException {
+      if (Arrays.binarySearch(VOID_ELEMENTS,localName) > -1) {
+        try {
+          writer.write('<');
+          writer.write('/');
+          writer.write(localName);
+          writer.write('>');
+        } catch (IOException e) {
+          throw new SAXException(e);
+        }
+      } 
+      super.endElement(uri, localName, name);
+    }    
+  }
+}

Added: incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlHelper.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlHelper.java?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlHelper.java
(added)
+++ incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlHelper.java
Mon Jan 14 15:50:31 2008
@@ -0,0 +1,162 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.ext.html;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.abdera.Abdera;
+import org.apache.abdera.model.Div;
+import org.apache.abdera.model.Document;
+import org.apache.abdera.model.Element;
+import org.apache.abdera.protocol.client.AbderaClient;
+import org.apache.abdera.protocol.client.ClientResponse;
+import org.apache.abdera.util.MimeTypeHelper;
+import org.apache.abdera.util.XmlRestrictedCharReader;
+
+public class HtmlHelper {
+  
+  private HtmlHelper() {}
+  
+  public static Div parse(String value) {
+    return parse(Abdera.getInstance(),value);
+  }
+  
+  public static Div parse(InputStream in) {
+    return parse(Abdera.getInstance(),in);
+  }
+  
+  public static Div parse(InputStream in, String charset) {
+    return parse(Abdera.getInstance(),in,charset);
+  }
+  
+  public static Div parse(Reader in) {
+    return parse(Abdera.getInstance(),in);
+  }
+  
+  public static Div parse(Abdera abdera, String value) {
+    return parse(abdera, new StringReader(value));
+  }
+  
+  public static Div parse(Abdera abdera, InputStream in) {
+    return parse(abdera, in, "UTF-8");
+  }
+  
+  public static Div parse(Abdera abdera, InputStream in, String charset) {
+    try {
+      return parse(abdera, new InputStreamReader(in, charset));
+    } catch (RuntimeException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public static Div parse(Abdera abdera, Reader in) {
+    String result = null;
+    Div div = abdera.getFactory().newDiv();
+    try {
+      div.setValue(HtmlCleaner.parse(in,true));
+      return div;
+    } catch (Exception e) {
+      // this is a temporary hack. some html really 
+      // can't be parsed successfully. in that case,
+      // we produce something that will likely render
+      // rather ugly. but there's not much else we 
+      // can do
+      if (result != null) div.setText(result);
+      return div;
+    }
+  }
+  
+  public static Document<Element> parseDocument(Reader in) {
+    return parseDocument(Abdera.getInstance(),in);
+  }
+  
+  public static Document<Element> parseDocument(Abdera abdera, Reader in) {
+    return abdera.getParser().parse(new StringReader(HtmlCleaner.parse(in,false)));
+  }
+  
+  /**
+   * This will search the element tree for elements named "link" with a 
+   * rel attribute containing the value of rel and a type attribute containg
+   * the value of type.
+   */
+  public static List<Element> discoverLinks(Element base, String type, String... rel)
{
+    List<Element> results = new ArrayList<Element>();
+    walkElementForLinks(results,base,rel,type);
+    return results;
+  }
+  
+  private static void walkElementForLinks(List<Element> results, Element base, String[]
rel, String type) {
+    if (checkElementForLink(base,rel,type)) results.add(base);
+    for (Element child : base.getElements()) 
+      walkElementForLinks(results,child,rel,type);
+  }
+  
+  private static boolean checkElementForLink(Element base, String[] relvals, String type)
{
+    if (base.getQName().getLocalPart().equalsIgnoreCase("link")) {
+      String relattr = base.getAttributeValue("rel");
+      String typeattr = base.getAttributeValue("type");
+      if (relattr != null) {
+        String[] rels = relattr.split("\\s+");
+        Arrays.sort(rels);
+        for (String rel : relvals) {
+          if (Arrays.binarySearch(rels, rel) < 0) return false;
+        }
+      }
+      if (type != null && typeattr == null) return false;
+      if (type == null && typeattr != null) return true;  // assume possible match
+      if (MimeTypeHelper.isMatch(type, typeattr)) return true;
+    }
+    return false;
+  }
+
+  public static List<Element> discoverLinks( 
+    String uri, 
+    String type, 
+    String... rel) 
+      throws IOException {
+    return discoverLinks(Abdera.getInstance(),uri,type,rel);
+  }
+  
+  public static List<Element> discoverLinks(
+    Abdera abdera, 
+    String uri, 
+    String type, 
+    String... rel) 
+      throws IOException {
+    AbderaClient client = new AbderaClient(abdera);
+    ClientResponse resp = client.get(uri);
+    InputStream in = resp.getInputStream();
+    InputStreamReader r = new InputStreamReader(in);
+    XmlRestrictedCharReader x = new XmlRestrictedCharReader(r);
+    Document<Element> doc = HtmlHelper.parseDocument(x);
+    List<Element> list = 
+      discoverLinks(
+        doc.getRoot(), 
+        type, rel);
+    return list;
+  }
+}

Added: incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParser.java?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParser.java
(added)
+++ incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParser.java
Mon Jan 14 15:50:31 2008
@@ -0,0 +1,68 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.ext.html;
+
+import java.io.Reader;
+
+import org.apache.abdera.Abdera;
+import org.apache.abdera.model.Div;
+import org.apache.abdera.model.Document;
+import org.apache.abdera.model.Element;
+import org.apache.abdera.parser.ParseException;
+import org.apache.abdera.parser.ParserOptions;
+import org.apache.abdera.util.AbstractNamedParser;
+
+public class HtmlParser 
+  extends AbstractNamedParser {
+  
+  public HtmlParser() {
+    this(null);
+  }
+  
+  public HtmlParser(Abdera abdera) {
+    super(abdera,"html");
+  }
+  
+  @Override 
+  protected ParserOptions initDefaultParserOptions() {
+    return new HtmlParserOptions();
+  }
+  
+  @SuppressWarnings("unchecked") 
+  public <T extends Element> Document<T> parse(
+    Reader in, 
+    String base,
+    ParserOptions options) 
+      throws ParseException {
+    boolean fragment = 
+      options instanceof HtmlParserOptions ?
+        ((HtmlParserOptions)options).isHtmlFragment() :
+        false;
+    Document<T> doc = null;
+    if (fragment) {
+      Div div = HtmlHelper.parse(abdera,in);
+      doc = this.getFactory().newDocument();
+      doc.setRoot((T)div);
+    } else {
+      doc = (Document<T>) HtmlHelper.parseDocument(abdera,in);
+    }
+    if (base != null) doc.setBaseUri(base);
+    return doc;
+  }
+ 
+}

Added: incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParserOptions.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParserOptions.java?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParserOptions.java
(added)
+++ incubator/abdera/java/trunk/extensions/html/src/main/java/org/apache/abdera/ext/html/HtmlParserOptions.java
Mon Jan 14 15:50:31 2008
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.ext.html;
+
+import org.apache.abdera.factory.Factory;
+import org.apache.abdera.i18n.text.Localizer;
+import org.apache.abdera.parser.stax.FOMException;
+import org.apache.abdera.parser.stax.FOMFactory;
+import org.apache.abdera.util.AbstractParserOptions;
+
+public class HtmlParserOptions 
+  extends AbstractParserOptions {
+  
+  private boolean fragment = false;
+  
+  @Override 
+  protected void checkFactory(Factory factory) {
+    if (!(factory instanceof FOMFactory))
+      throw new FOMException(
+        Localizer.sprintf("WRONG.PARSER.INSTANCE",FOMFactory.class.getName()));
+  }
+  
+  @Override 
+  protected void initFactory() {
+    if (factory == null) factory = new FOMFactory();
+  }      
+  
+  public boolean isHtmlFragment() {
+    return fragment;
+  }
+  
+  public void setHtmlFragment(boolean fragment) {
+    this.fragment = fragment;
+  }
+}

Added: incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/org.apache.abdera.parser.NamedParser
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/org.apache.abdera.parser.NamedParser?rev=611972&view=auto
==============================================================================
--- incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/org.apache.abdera.parser.NamedParser
(added)
+++ incubator/abdera/java/trunk/extensions/html/src/main/resources/META-INF/services/org.apache.abdera.parser.NamedParser
Mon Jan 14 15:50:31 2008
@@ -0,0 +1 @@
+org.apache.abdera.ext.html.HtmlParser
\ No newline at end of file



Mime
View raw message