incubator-any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ans...@apache.org
Subject svn commit: r1371537 [29/30] - in /incubator/any23/trunk: ./ core/ core/src/test/java/org/apache/any23/ core/src/test/java/org/apache/any23/cli/ core/src/test/java/org/apache/any23/extractor/ core/src/test/java/org/apache/any23/extractor/csv/ core/src/...
Date Fri, 10 Aug 2012 00:40:22 GMT
Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/species/species-example-2.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/species/species-example-2.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/species/species-example-2.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/species/species-example-2.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,159 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html lang="en">
+
+<!-- Uses php -->
+
+<head profile="http://www.ietf.org/rfc/rfc2731.txt http://purl.org/uF/2008/03/ http://www.w3.org/2003/g/data-view">
+
+    <title>West Midland Bird Club: County Lists 2004</title>
+
+</head>
+
+<body>
+
+<table class="infobox biota" style="text-align: left; width: 200px">
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90"><i>Magnolia virginiana</i></th>
+    </tr>
+    <tr>
+        <td colspan="2" style="text-align: center"><a href="/wiki/File:Sweetbay1082.jpg" class="image"><img
+                alt="White flower surrounded by long green leaves."
+                src="http://upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Sweetbay1082.jpg/220px-Sweetbay1082.jpg"
+                width="220" height="180"/></a></td>
+    </tr>
+    <tr>
+        <td colspan="2" style="text-align: center; font-size: 88%"><i>Magnolia virginiana</i></td>
+
+    </tr>
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90"><a href="/wiki/Biological_classification"
+                                                                                 title="Biological classification">Scientific
+            classification</a></th>
+    </tr>
+    <tr>
+        <td>Kingdom:</td>
+        <td><span class="kingdom"><a href="/wiki/Plant" title="Plant">Plantae</a></span><br/></td>
+    </tr>
+    <tr>
+        <td>Division:</td>
+        <td><a href="/wiki/Angiospermae" title="Angiospermae" class="mw-redirect">Angiospermae</a><br/></td>
+    </tr>
+
+    <tr>
+        <td>(unranked):</td>
+        <td><a href="/wiki/Magnoliid" title="Magnoliid" class="mw-redirect">Magnoliids</a><br/></td>
+    </tr>
+    <tr>
+        <td>Order:</td>
+        <td><span class="order"><a href="/wiki/Magnoliales" title="Magnoliales">Magnoliales</a></span><br/></td>
+    </tr>
+    <tr>
+        <td>Family:</td>
+        <td><span class="family"><a href="/wiki/Magnoliaceae" title="Magnoliaceae">Magnoliaceae</a></span><br/></td>
+
+    </tr>
+    <tr>
+        <td>Genus:</td>
+        <td><span class="genus"><i><a href="/wiki/Magnolia" title="Magnolia">Magnolia</a></i></span><br/></td>
+    </tr>
+    <tr>
+        <td>Species:</td>
+        <td><span style="white-space: nowrap"><i><b>M. virginiana</b></i></span><br/></td>
+    </tr>
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90"><a href="/wiki/Binomial_nomenclature"
+                                                                                 title="Binomial nomenclature">Binomial
+            name</a></th>
+    </tr>
+
+    <tr>
+        <td colspan="2" style="text-align: center"><b><span class="binomial"><i>Magnolia virginiana</i></span></b><br/>
+            <small><a href="/wiki/Carl_Linnaeus" title="Carl Linnaeus">L.</a></small>
+        </td>
+    </tr>
+</table>
+<table class="infobox biota" style="text-align: left; width: 200px">
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90">Oaks</th>
+    </tr>
+    <tr>
+        <td colspan="2" style="text-align: center"><a href="/wiki/File:Quercus_robur.jpg" class="image"><img
+                alt="Cluster of oak leaves and acorns."
+                src="http://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Quercus_robur.jpg/220px-Quercus_robur.jpg"
+                width="220" height="298"/></a></td>
+    </tr>
+    <tr>
+        <td colspan="2" style="text-align: center; font-size: 88%">Foliage and acorns of the <a
+                href="/wiki/Pedunculate_Oak" title="Pedunculate Oak" class="mw-redirect">Pedunculate Oak</a>, <i>Quercus
+            robur</i></td>
+    </tr>
+
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90"><a href="/wiki/Biological_classification"
+                                                                                 title="Biological classification">Scientific
+            classification</a></th>
+    </tr>
+    <tr>
+        <td>Kingdom:</td>
+        <td><span class="kingdom"><a href="/wiki/Plant" title="Plant">Plantae</a></span><br/></td>
+    </tr>
+    <tr>
+        <td>Division:</td>
+        <td><a href="/wiki/Angiospermae" title="Angiospermae" class="mw-redirect">Angiospermae</a><br/></td>
+    </tr>
+    <tr>
+
+        <td>(unranked):</td>
+        <td><a href="/wiki/Eudicots" title="Eudicots">Eudicots</a><br/></td>
+    </tr>
+    <tr>
+        <td>(unranked):</td>
+        <td><a href="/wiki/Rosids" title="Rosids">Rosids</a><br/></td>
+    </tr>
+    <tr>
+        <td>Order:</td>
+        <td><span class="order"><a href="/wiki/Fagales" title="Fagales">Fagales</a></span><br/></td>
+    </tr>
+
+    <tr>
+        <td>Family:</td>
+        <td><span class="family"><a href="/wiki/Fagaceae" title="Fagaceae">Fagaceae</a></span><br/></td>
+    </tr>
+    <tr>
+        <td>Genus:</td>
+        <td><span class="genus"><i><b>Quercus</b></i></span><br/>
+            <small><a href="/wiki/Carl_Linnaeus" title="Carl Linnaeus">L.</a></small>
+        </td>
+    </tr>
+    <tr>
+        <th colspan="2" style="text-align: center; background-color: #90EE90"><a href="/wiki/Species" title="Species">Species</a>
+        </th>
+
+    </tr>
+    <tr>
+        <td colspan="2" style="text-align: left">
+            <p>See <a href="/wiki/List_of_Quercus_species" title="List of Quercus species">List of <i>Quercus</i>
+                species</a>.</p>
+        </td>
+    </tr>
+</table>
+
+
+</body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hellö Wörld!</title>
+    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.xhtml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.xhtml?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.xhtml (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-iso-8859-1.xhtml Fri Aug 10 00:40:13 2012
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+  <head>
+    <title>Hellö Wörld!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8-after-title.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8-after-title.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8-after-title.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8-after-title.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hellö Wörld!</title>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+    <title>Hellö Wörld!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.xhtml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.xhtml?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.xhtml (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/encoding-utf-8.xhtml Fri Aug 10 00:40:13 2012
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+  <head>
+    <title>Hellö Wörld!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/me-and-sweetheart.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/me-and-sweetheart.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/me-and-sweetheart.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/me-and-sweetheart.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me sweetheart">here</a>.
+      I'm my own sweetheart! XFN doesn't allow this.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/mixed-case.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/mixed-case.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/mixed-case.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/mixed-case.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <TITLE>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a friend of <A href="http://alice.example.com/" rel="friend">Alice</a> who is also my sweetheart.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-friends.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-friends.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-friends.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-friends.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my friends are <a href="http://alice.example.com/" rel="friend">Alice</a>
+      and <a href="http://charlie.example.com/" rel="friend">Charlie</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-rel.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-rel.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-rel.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/multiple-rel.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a friend of <a href="http://alice.example.com/" rel="friend sweetheart">Alice</a> who is also my sweetheart.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-rel.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-rel.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-rel.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-rel.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a friend of <a href="http://alice.example.com/">Alice</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-valid-rel.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-valid-rel.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-valid-rel.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/no-valid-rel.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a compatriot of <a href="http://alice.example.com/" rel="compatriot">Alice</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-friend.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-friend.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-friend.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-friend.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a friend of <a href="http://alice.example.com/" rel="friend">Alice</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-me.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-me.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-me.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/simple-me.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/some-links-without-rel.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/some-links-without-rel.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/some-links-without-rel.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/some-links-without-rel.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my friends are <a href="http://alice.example.com/">Alice</a>
+      and <a href="http://charlie.example.com/" rel="friend">Charlie</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/strip-spaces.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/strip-spaces.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/strip-spaces.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/strip-spaces.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,29 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>        
+    
+Hello World!    
+
+    </title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="http://bob.example.com/" rel="    me   ">here</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/tagsoup.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/tagsoup.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/tagsoup.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/tagsoup.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,20 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<h1>Hello World!</h2>
+I'm Bob and my homepage is <a href="http://bob.example.com/" rel="me">here</a>.
+</body></html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/upcase-href.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/upcase-href.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/upcase-href.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/upcase-href.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <TITLE>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and I'm a friend of <a HREF="http://alice.example.com/" rel="friend">Alice</a> who is also my sweetheart.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/with-relative-uri.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/with-relative-uri.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/with-relative-uri.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/microformats/xfn/with-relative-uri.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>I'm Bob and my homepage is <a href="/foo" rel="me">here</a>.</p>
+  </body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/any23-xml-mimetype.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/any23-xml-mimetype.xml?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/any23-xml-mimetype.xml (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/any23-xml-mimetype.xml Fri Aug 10 00:40:13 2012
@@ -0,0 +1,870 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+  Description: This xml file defines the valid mime types used
+  by Tika. The mime types within this file are based on the types in the
+  mime-types.xml file available in Apache Nutch.
+-->
+<mime-info>
+
+    <!-- DEFAULT -->
+    <mime-type type="application/octet-stream">
+        <magic priority="50">
+            <match value="\037\036" type="string" offset="0"/>
+            <match value="017437" type="host16" offset="0"/>
+            <match value="0x1fff" type="host16" offset="0"/>
+            <match value="\377\037" type="string" offset="0"/>
+            <match value="0145405" type="host16" offset="0"/>
+        </magic>
+        <glob pattern="*.bin"/>
+    </mime-type>
+
+    <mime-type type="text/plain">
+        <magic priority="50">
+            <match value="This is TeX," type="string" offset="0"/>
+            <match value="This is METAFONT," type="string" offset="0"/>
+        </magic>
+        <glob pattern="*.txt"/>
+        <glob pattern="*.asc"/>
+        <glob pattern="*.nt"/>
+        <glob pattern="INSTALL"/>
+        <glob pattern="KEYS"/>
+        <glob pattern="Makefile"/>
+        <glob pattern="README"/>
+        <glob pattern="abs-linkmap"/>
+        <glob pattern="abs-menulinks"/>
+    </mime-type>
+
+    <mime-type type="application/xml">
+        <alias type="text/xml"/>
+        <magic priority="50">
+            <match value="&lt;?xml" type="string" offset="0"/>
+            <match value="&lt;?XML" type="string" offset="0"/>
+            <match value="&lt;!--" type="string" offset="0"/>
+            <match value="0xFFFE3C003F0078006D006C00" type="string" offset="0"/>
+            <match value="0xFEFF003C003F0078006D006C" type="string" offset="0"/>
+        </magic>
+        <glob pattern="*.xml"/>
+        <glob pattern="*.xsl"/>
+        <glob pattern="*.xsd"/>
+        <sub-class-of type="text/plain"/>
+    </mime-type>
+
+    <!-- BEGIN: Semantic Web document mime types. -->
+
+    <!-- RSS -->
+    <mime-type type="application/rss+xml">
+        <alias type="text/rss"/>
+        <root-XML localName="rss"/>
+        <root-XML namespaceURI="http://purl.org/rss/1.0/"/>
+        <glob pattern="*.rss"/>
+    </mime-type>
+
+    <mime-type type="text/rdf+n3">
+        <alias type="text/n3"/>
+        <alias type="application/n3"/>
+        <glob pattern="*.n3"/>
+        <magic priority="50">
+            <match value="@prefix" type="string" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="text/rdf+nq">
+        <alias type="text/nq"/>
+        <alias type="application/nq"/>
+        <glob pattern="*.nq"/>
+    </mime-type>
+
+    <!-- Turtle -->
+    <mime-type type="application/x-turtle">
+        <alias type="text/turtle"/>
+        <alias type="application/turtle"/>
+        <glob pattern="*.ttl"/>
+    </mime-type>
+
+    <!-- RDF -->
+    <mime-type type="application/rdf+xml">
+        <sub-class-of type="application/xml"/>
+        <root-XML localName="RDF"/>
+        <root-XML localName="rdf"/>
+        <root-XML namespaceURI="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>
+        <root-XML namespaceURI="http://purl.org/rss/1.0/"/>
+        <alias type="text/rdf"/>
+        <magic priority="50">
+            <match value="&lt;rdf:RDF" type="string" offset="0:64"/>
+            <match value="&lt;RDF" type="string" offset="0:64"/>
+            <match value="xmlns:rdf" type="string" offset="0:64"/>
+            <match value="*&lt;DOCTYPE rdf:RDF" type="string" offset="0:120"/>
+        </magic>
+        <glob pattern="*.rdf"/>
+        <glob pattern="*.rdfs"/>
+        <glob pattern="*.xrdf"/>
+        <glob pattern="*.owl"/>
+        <glob pattern="*.rdfx"/>
+    </mime-type>
+
+    <!-- ATOM -->
+    <mime-type type="application/atom+xml">
+        <sub-class-of type="application/xml"/>
+        <root-XML localName="feed"/>
+        <root-XML namespaceURI="http://www.w3.org/2005/Atom/" localName="feed"/>
+        <root-XML namespaceURI="http://www.w3.org/2005/Atom"  localName="feed"/>
+    </mime-type>
+
+    <!-- WSDL -->
+    <mime-type type="application/x-wsdl">
+        <sub-class-of type="application/xml"/>
+        <root-XML localName="definitions"/>
+        <root-XML namespaceURI="http://schemas.xmlsoap.org/wsdl/" localName="definitions"/>
+        <glob pattern="*.wsdl"/>
+    </mime-type>
+    <!-- END Semantic Web document mime types. -->
+
+    <mime-type type="text/html">
+        <magic priority="50">
+            <match value="&lt;!DOCTYPE HTML" type="string" offset="0:64"/>
+            <match value="&lt;!doctype html" type="string" offset="0:64"/>
+            <match value="&lt;HEAD" type="string" offset="0:64"/>
+            <match value="&lt;head" type="string" offset="0:64"/>
+            <match value="&lt;TITLE" type="string" offset="0:64"/>
+            <match value="&lt;title" type="string" offset="0:64"/>
+            <match value="&lt;html" type="string" offset="0:64"/>
+            <match value="&lt;HTML" type="string" offset="0:64"/>
+            <match value="&lt;BODY" type="string" offset="0"/>
+            <match value="&lt;body" type="string" offset="0"/>
+            <match value="&lt;TITLE" type="string" offset="0"/>
+            <match value="&lt;title" type="string" offset="0"/>
+            <match value="&lt;!--" type="string" offset="0"/>
+            <match value="&lt;h1" type="string" offset="0"/>
+            <match value="&lt;H1" type="string" offset="0"/>
+            <match value="&lt;!doctype HTML" type="string" offset="0"/>
+            <match value="&lt;!DOCTYPE html" type="string" offset="0"/>
+        </magic>
+        <glob pattern="*.html"/>
+        <glob pattern="*.htm"/>
+    </mime-type>
+
+    <mime-type type="application/xhtml+xml">
+        <sub-class-of type="application/xml"/>
+        <root-XML namespaceURI='http://www.w3.org/1999/xhtml'
+                  localName='html'/>
+        <glob pattern="*.xhtml"/>
+        <magic priority="50">
+            <match value="&lt;!DOCTYPE html PUBLIC &quot;-//W3C//DTD XHTML+RDFa 1.0//EN" type="string" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <!-- BEGIN: MS-Office documents -->
+    <mime-type type="application/vnd.ms-powerpoint">
+        <glob pattern="*.ppz"/>
+        <glob pattern="*.ppt"/>
+        <glob pattern="*.pps"/>
+        <glob pattern="*.pot"/>
+        <magic priority="50">
+            <match value="0xcfd0e011" type="little32" offset="0"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.ms-excel">
+        <magic priority="50">
+            <match value="Microsoft Excel 5.0 Worksheet" type="string"
+                   offset="2080"/>
+        </magic>
+        <glob pattern="*.xls"/>
+        <glob pattern="*.xlc"/>
+        <glob pattern="*.xll"/>
+        <glob pattern="*.xlm"/>
+        <glob pattern="*.xlw"/>
+        <glob pattern="*.xla"/>
+        <glob pattern="*.xlt"/>
+        <glob pattern="*.xld"/>
+        <alias type="application/msexcel"/>
+    </mime-type>
+
+    <mime-type type="application/msword">
+        <magic priority="50">
+            <match value="\x31\xbe\x00\x00" type="string" offset="0"/>
+            <match value="PO^Q`" type="string" offset="0"/>
+            <match value="\376\067\0\043" type="string" offset="0"/>
+            <match value="\333\245-\0\0\0" type="string" offset="0"/>
+            <match value="Microsoft Word 6.0 Document" type="string"
+                   offset="2080"/>
+            <match value="Microsoft Word document data" type="string"
+                   offset="2112"/>
+        </magic>
+        <glob pattern="*.doc"/>
+        <alias type="application/vnd.ms-word"/>
+    </mime-type>
+    <!-- END: MS-Office documents -->
+
+    <!--
+         =====================================================================
+         Open Document Format for Office Applications (OpenDocument) v1.0
+         http://www.oasis-open.org/specs/index.php#opendocumentv1.0
+         =====================================================================
+     -->
+
+    <mime-type type="application/vnd.oasis.opendocument.text">
+        <comment>OpenDocument v1.0: Text document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.text"/>
+        <glob pattern="*.odt"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.text"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.text-template">
+        <comment>OpenDocument v1.0: Text document used as template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.text-template"/>
+        <glob pattern="*.ott"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.text-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.graphics">
+        <comment>OpenDocument v1.0: Graphics document (Drawing)
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.graphics"/>
+        <glob pattern="*.odg"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.graphics"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.graphics-template">
+        <comment>OpenDocument v1.0: Graphics document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.graphics-template"/>
+        <glob pattern="*.otg"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.graphics-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.presentation">
+        <comment>OpenDocument v1.0: Presentation document
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.presentation"/>
+        <glob pattern="*.odp"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.presentation"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.presentation-template">
+        <comment>OpenDocument v1.0: Presentation document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.presentation-template"/>
+        <glob pattern="*.otp"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.presentation-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.spreadsheet">
+        <comment>OpenDocument v1.0: Spreadsheet document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.spreadsheet"/>
+        <glob pattern="*.ods"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.spreadsheet-template">
+        <comment>OpenDocument v1.0: Spreadsheet document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.spreadsheet-template"/>
+        <glob pattern="*.ots"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.chart">
+        <comment>OpenDocument v1.0: Chart document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.chart"/>
+        <glob pattern="*.odc"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.chart"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.chart-template">
+        <comment>OpenDocument v1.0: Chart document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.chart-template"/>
+        <glob pattern="*.otc"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.chart-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.image">
+        <comment>OpenDocument v1.0: Image document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.image"/>
+        <glob pattern="*.odi"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.image"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.image-template">
+        <comment>OpenDocument v1.0: Image document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.image-template"/>
+        <glob pattern="*.oti"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.image-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.formula">
+        <comment>OpenDocument v1.0: Formula document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.formula"/>
+        <glob pattern="*.odf"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.formula"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.formula-template">
+        <comment>OpenDocument v1.0: Formula document used as
+            template
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.formula-template"/>
+        <glob pattern="*.otf"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.formula-template"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.text-master">
+        <comment>OpenDocument v1.0: Global Text document</comment>
+        <alias type="application/x-vnd.oasis.opendocument.text-master"/>
+        <glob pattern="*.odm"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.text-master"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.oasis.opendocument.text-web">
+        <comment>OpenDocument v1.0: Text document used as template
+            for HTML documents
+        </comment>
+        <alias type="application/x-vnd.oasis.opendocument.text-web"/>
+        <glob pattern="*.oth"/>
+        <magic>
+            <match type="string" offset="0" value="PK">
+                <match type="string" offset="30"
+                       value="mimetypeapplication/vnd.oasis.opendocument.text-web"/>
+            </match>
+        </magic>
+    </mime-type>
+
+    <!-- BEGIN: OSX Specific -->
+    <mime-type type="application/mac-binhex40">
+        <glob pattern="*.hqx"/>
+    </mime-type>
+
+    <mime-type type="application/mac-compactpro">
+        <glob pattern="*.cpt"/>
+    </mime-type>
+    <!-- END: OSX Specific -->
+
+    <mime-type type="application/rtf">
+        <glob pattern="*.rtf"/>
+        <alias type="text/rtf"/>
+    </mime-type>
+
+    <mime-type type="application/pdf">
+        <magic priority="50">
+            <match value="%PDF-" type="string" offset="0"/>
+        </magic>
+        <glob pattern="*.pdf"/>
+        <alias type="application/x-pdf"/>
+    </mime-type>
+
+    <mime-type type="application/x-mif">
+        <alias type="application/vnd.mif"/>
+    </mime-type>
+
+    <mime-type type="application/vnd.wap.wbxml">
+        <glob pattern="*.wbxml"/>
+    </mime-type>
+
+    <mime-type type="application/vnd.wap.wmlc">
+        <_comment>Compiled WML Document</_comment>
+        <glob pattern="*.wmlc"/>
+    </mime-type>
+
+    <mime-type type="application/vnd.wap.wmlscriptc">
+        <_comment>Compiled WML Script</_comment>
+        <glob pattern="*.wmlsc"/>
+    </mime-type>
+
+    <mime-type type="text/vnd.wap.wmlscript">
+        <_comment>WML Script</_comment>
+        <glob pattern="*.wmls"/>
+    </mime-type>
+
+    <mime-type type="application/x-bzip">
+        <alias type="application/x-bzip2"/>
+    </mime-type>
+
+    <mime-type type="application/x-bzip-compressed-tar">
+        <glob pattern="*.tbz"/>
+        <glob pattern="*.tbz2"/>
+    </mime-type>
+
+    <mime-type type="application/x-cdlink">
+        <_comment>Virtual CD-ROM CD Image File</_comment>
+        <glob pattern="*.vcd"/>
+    </mime-type>
+
+    <mime-type type="application/x-director">
+        <_comment>Shockwave Movie</_comment>
+        <glob pattern="*.dcr"/>
+        <glob pattern="*.dir"/>
+        <glob pattern="*.dxr"/>
+    </mime-type>
+
+    <mime-type type="application/x-futuresplash">
+        <_comment>Macromedia FutureSplash File</_comment>
+        <glob pattern="*.spl"/>
+    </mime-type>
+
+    <mime-type type="application/x-java">
+        <alias type="application/java"/>
+    </mime-type>
+
+    <mime-type type="application/x-koan">
+        <_comment>SSEYO Koan File</_comment>
+        <glob pattern="*.skp"/>
+        <glob pattern="*.skd"/>
+        <glob pattern="*.skt"/>
+        <glob pattern="*.skm"/>
+    </mime-type>
+
+    <mime-type type="application/x-latex">
+        <_comment>LaTeX Source Document</_comment>
+        <glob pattern="*.latex"/>
+    </mime-type>
+
+    <mime-type type="application/x-ms-dos-executable">
+        <alias type="application/x-dosexec"/>
+    </mime-type>
+
+    <mime-type type="application/ogg">
+        <alias type="application/x-ogg"/>
+    </mime-type>
+
+    <mime-type type="application/x-rar">
+        <alias type="application/x-rar-compressed"/>
+    </mime-type>
+
+    <mime-type type="application/x-shellscript">
+        <alias type="application/x-sh"/>
+    </mime-type>
+
+    <mime-type type="audio/midi">
+        <glob pattern="*.kar"/>
+    </mime-type>
+
+    <mime-type type="audio/x-pn-realaudio">
+        <alias type="audio/x-realaudio"/>
+    </mime-type>
+
+    <mime-type type="image/tiff">
+        <magic priority="50">
+            <match value="0x4d4d2a00" type="string" offset="0"/>
+            <match value="0x49492a00" type="string" offset="0"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="message/rfc822">
+        <magic priority="50">
+            <match type="string" value="Relay-Version:" offset="0"/>
+            <match type="string" value="#! rnews" offset="0"/>
+            <match type="string" value="N#! rnews" offset="0"/>
+            <match type="string" value="Forward to" offset="0"/>
+            <match type="string" value="Pipe to" offset="0"/>
+            <match type="string" value="Return-Path:" offset="0"/>
+            <match type="string" value="From:" offset="0"/>
+            <match type="string" value="Message-ID:" offset="0"/>
+            <match type="string" value="Date:" offset="0"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="image/vnd.wap.wbmp">
+        <_comment>Wireless Bitmap File Format</_comment>
+        <glob pattern="*.wbmp"/>
+    </mime-type>
+
+    <mime-type type="image/x-psd">
+        <alias type="image/photoshop"/>
+    </mime-type>
+
+    <mime-type type="image/x-xcf">
+        <alias type="image/xcf"/>
+        <magic priority="50">
+            <match type="string" value="gimp xcf " offset="0"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="model/iges">
+        <_comment>
+            Initial Graphics Exchange Specification Format
+        </_comment>
+        <glob pattern="*.igs"/>
+        <glob pattern="*.iges"/>
+    </mime-type>
+
+    <mime-type type="model/mesh">
+        <glob pattern="*.msh"/>
+        <glob pattern="*.mesh"/>
+        <glob pattern="*.silo"/>
+    </mime-type>
+
+    <mime-type type="model/vrml">
+        <glob pattern="*.vrml"/>
+    </mime-type>
+
+    <mime-type type="text/x-tcl">
+        <alias type="application/x-tcl"/>
+    </mime-type>
+
+    <mime-type type="text/x-tex">
+        <alias type="application/x-tex"/>
+    </mime-type>
+
+    <mime-type type="text/x-texinfo">
+        <alias type="application/x-texinfo"/>
+    </mime-type>
+
+    <mime-type type="text/x-troff-me">
+        <alias type="application/x-troff-me"/>
+    </mime-type>
+
+    <mime-type type="video/vnd.mpegurl">
+        <glob pattern="*.mxu"/>
+    </mime-type>
+
+    <mime-type type="x-conference/x-cooltalk">
+        <_comment>Cooltalk Audio</_comment>
+        <glob pattern="*.ice"/>
+    </mime-type>
+
+    <!-- ===================================================================== -->
+    <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt                  -->
+    <!-- ===================================================================== -->
+
+    <mime-type type="image/x-icon">
+        <glob pattern="*.ico"/>
+    </mime-type>
+
+    <mime-type type="image/jpeg">
+        <glob pattern="*.jpg"/>
+    </mime-type>
+
+    <mime-type type="image/png">
+        <glob pattern="*.png"/>
+    </mime-type>
+
+    <mime-type type="application/zip">
+        <glob pattern="*.zip"/>
+        <magic priority="50">
+            <match type="string" value="PK" offset="0:2"/>
+        </magic>
+    </mime-type>
+
+
+    <mime-type type="application/gzip">
+        <glob pattern="*.gz"/>
+        <glob pattern="*.tgz"/>
+        <glob pattern="*.gzip"/>
+        <!--
+          <magic priority="50">
+              <match type="host16" value="1f 8b 08" offset="0:2" />
+          </magic>
+        -->
+    </mime-type>
+
+     <!--
+     <description>
+     <mimeType>application/x-compress">
+        <extensions>z" />
+        <magicNumber encoding="hex">1f 9d 90</magicNumber>
+     </mime-type>
+
+     <description>
+         <mimeType>application/bzip2">
+         <extensions>bz2,tbz2" />
+         <magicNumber encoding="hex">42 5a 68 39 31</magicNumber>
+     </mime-type>
+
+     <description>
+         <mimeType>application/x-tar">
+         <magicNumber encoding="string" offset="257">ustar</magicNumber>
+         <extensions>tar" />
+     </mime-type>
+
+     <description>
+         <mimeType>application/x-rar-compressed">
+         <extensions>rar" />
+         <magicNumber encoding="hex">52 61 72 21 1a</magicNumber>
+     </mime-type>
+
+     <description>
+         <mimeType>application/stuffit">
+         <extensions>sit" />
+         <magicNumber encoding="string">SIT!</magicNumber>
+     </mime-type>
+
+     <description>
+         <mimeType>application/binhex">
+         <extensions>hqx" />
+     </mime-type>
+     -->
+
+    <mime-type type="audio/basic">
+        <glob pattern="*.au"/>
+        <glob pattern="*.snd"/>
+    </mime-type>
+
+    <mime-type type="video/x-ms-asf">
+        <glob pattern="*.asf"/>
+        <magicNumber encoding="hex">30 26 b2 75 8e 66 cf 11 a6 d9 00 aa 00 62 ce 6c</magicNumber>
+    </mime-type>
+
+    <mime-type type="video/x-ms-asx">
+        <glob pattern="*.asx"/>
+        <magic priority="50">
+            <match type="string" value="&lt;asx" offset="0:64"/>
+            <match type="string" value="&lt;ASX" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="audio/x-ms-wax">
+        <glob pattern="*.wax"/>
+    </mime-type>
+
+    <mime-type type="video/x-ms-wvx">
+        <glob pattern="*.wvx"/>
+    </mime-type>
+
+    <mime-type type="video/x-ms-wmx">
+        <glob pattern="*.wmx"/>
+    </mime-type>
+
+    <mime-type type="video/x-msvideo">
+        <glob pattern="*.avi"/>
+        <!--	<magicNumber encoding="hex" offset="8">41 56 49 20</magicNumber> -->
+    </mime-type>
+
+
+    <!--
+          This MIME type was invented to let the wmv and wma media types
+          share the magic number. Differentiation between these two files
+          is only possible based on file name extension.
+    -->
+
+    <mime-type type="application/x-ms-wm">
+        <magicNumber encoding="hex">30 26 b2 75 8e 66 cf 11 a6 d9 00 aa 00 62 ce 6c</magicNumber>
+    </mime-type>
+
+    <mime-type type="audio/x-ms-wma">
+        <sub-class-of type="application/x-ms-wm"/>
+        <glob pattern="*.wma"/>
+    </mime-type>
+
+    <mime-type type="video/x-ms-wmv">
+        <sub-class-of type="application/x-ms-wm"/>
+        <glob pattern="*.wmv"/>
+        <glob pattern="*.wm"/>
+    </mime-type>
+
+    <mime-type type="video/quicktime">
+        <glob pattern="*.mov"/>
+        <!--  <magicNumber encoding="string" offset="4">moov</magicNumber> -->
+    </mime-type>
+
+    <mime-type type="video/mpeg">
+        <glob pattern="*.mpg"/>
+        <glob pattern="*.mpeg"/>
+        <!--
+      <magicNumber encoding="hex">00 00 01 b3</magicNumber>
+      <magicNumber encoding="hex">00 00 01 ba</magicNumber> -->
+    </mime-type>
+
+    <mime-type type="application/x-shockwave-flash">
+        <glob pattern="*.swf"/>
+        <!--  <magicNumber encoding="hex">46 57 53</magicNumber>-->
+    </mime-type>
+
+    <mime-type type="application/x-ogg">
+        <glob pattern="*.ogg"/>
+        <magic priority="50">
+            <match type="string" value="OggS" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/vnd.rn-realmedia">
+        <glob pattern="*.rm"/>
+        <glob pattern="*.ram"/>
+        <magic priority="50">
+            <match type="string" value=".RMF" offset="0:64"/>
+            <match type="string" value="rtsp://" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="audio/x-wav">
+        <glob pattern="*.wav"/>
+        <magic priority="50">
+            <match type="string" value="WAVE" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="audio/mpeg">
+        <glob pattern="*.mp3"/>
+        <glob pattern="*.mp2"/>
+        <magic priority="50">
+            <match type="string" value="ID3" offset="0:64"/>
+        </magic>
+
+    </mime-type>
+
+    <mime-type type="audio/midi">
+        <glob pattern="*.mid"/>
+        <glob pattern="*.midi"/>
+        <glob pattern="*.rmi"/>
+        <magic priority="50">
+            <match type="string" value="MThd" offset="0:64"/>
+            <match type="string" value="RMI" offset="0:8"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="video/x-msvideo">
+        <glob pattern="*.avi"/>
+        <!-- 	<magicNumber encoding="hex" offset="8">41 56 49 20</magicNumber> -->
+    </mime-type>
+
+    <mime-type type="video/mp4">
+        <glob pattern="*.mp4"/>
+        <glob pattern="*.mpg4"/>
+        <glob pattern="*.m4v"/>
+        <glob pattern="*.mp4v"/>
+        <glob pattern="*.divx"/>
+        <glob pattern="*.xvid"/>
+        <glob pattern="*.264"/>
+    </mime-type>
+
+    <mime-type type="audio/mp4">
+        <glob pattern="*.m4a"/>
+        <glob pattern="*.m4p"/>
+    </mime-type>
+
+    <mime-type type="video/3gpp">
+        <glob pattern="*.3gp"/>
+        <glob pattern="*.3g2"/>
+    </mime-type>
+
+    <mime-type type="audio/x-aiff">
+        <glob pattern="*.aiff"/>
+        <glob pattern="*.aif"/>
+        <glob pattern="*.aifc"/>
+        <glob pattern="*.aiff"/>
+        <magic priority="50">
+            <match type="string" value="FORM" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="application/x-ms-wmd">
+        <sub-class-of type="application/zip"/>
+        <glob pattern="*.wmd"/>
+    </mime-type>
+
+    <mime-type type="video/x-flv">
+        <glob pattern="*.flv"/>
+        <magic priority="50">
+            <match type="string" value="FLV" offset="0:64"/>
+        </magic>
+    </mime-type>
+
+    <mime-type type="audio/flac">
+        <glob pattern="*.flac"/>
+        <!--<magicNumber encoding="hex">66 4c 61 43 00 00 00 22</magicNumber>-->
+    </mime-type>
+
+    <mime-type type="application/smil">
+        <glob pattern="*.smi"/>
+        <glob pattern="*.smil"/>
+    </mime-type>
+</mime-info>

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-comma.csv
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-comma.csv?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-comma.csv (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-comma.csv Fri Aug 10 00:40:13 2012
@@ -0,0 +1,4 @@
+first name, last name, http://xmlns.org/foaf/01/knows, age
+Davide, Palmisano, http://michelemostarda.com, 30, value should not appear
+Michele, Mostarda, http://g1o.net,
+Giovanni, Tummarello, http://twitter.com/cygri,
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-semicolon.csv
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-semicolon.csv?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-semicolon.csv (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-semicolon.csv Fri Aug 10 00:40:13 2012
@@ -0,0 +1,4 @@
+first name; last name; http://xmlns.org/foaf/01/knows; age
+Davide; Palmisano; http://michelemostarda.com; 30; value should not appear
+Michele; Mostarda; http://g1o.net;
+Giovanni; Tummarello; http://twitter.com/cygri;
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-tab.csv
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-tab.csv?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-tab.csv (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-tab.csv Fri Aug 10 00:40:13 2012
@@ -0,0 +1,4 @@
+first name	 last name	 http://xmlns.org/foaf/01/knows	 age
+Davide	 Palmisano	 http://michelemostarda.com	 30	 value should not appear
+Michele	 Mostarda	 http://g1o.net	
+Giovanni	 Tummarello	 http://twitter.com/cygri	
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-type.csv
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-type.csv?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-type.csv (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/csv/test-type.csv Fri Aug 10 00:40:13 2012
@@ -0,0 +1,4 @@
+fieldname,fieldvalue
+k1,5.2
+k2,7.9
+k3,10
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/rdf/testMalformedLiteral
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/rdf/testMalformedLiteral?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/rdf/testMalformedLiteral (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/rdf/testMalformedLiteral Fri Aug 10 00:40:13 2012
@@ -0,0 +1,20 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix dc:  <http://purl.org/dc/terms/> .
+<> dc:date "2009-12-31"^^xsd:dateTime .

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/xpath/xpathextractor-test.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/xpath/xpathextractor-test.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/xpath/xpathextractor-test.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/extractor/xpath/xpathextractor-test.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,24 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+</head>
+<body>
+ <div>value1</div>
+ <div>http://test.dom/uri</div>
+</body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/invalid-rdfa-about.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/invalid-rdfa-about.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/invalid-rdfa-about.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/invalid-rdfa-about.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,67 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xml:lang="en" version="XHTML+RDFa 1.0" xmlns:ni="http://iptc.org/std/nar/2006-10-01/" xmlns:okkam="http://models.okkam.org/ENS-core-vocabulary.owl#" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:ansa="http://bdm.ansa.it/NewsML-G2/ns/content" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:v="http://rdf.data-vocabulary.org/#" xmlns:dc="http://purl.org/dc/terms/" xmlns:ninitf="http://iptc.org/std/NITF/2006-10-18/" xmlns="http://www.w3.org/1999/xhtml">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><meta http-equiv="Content-Language" content="it"/><title>Omicidio Desio: fermati i due figli - ANSA.it</title>
+<link rel="stylesheet" type="text/css" href="/web/css/0226171412_style_base.css" media="screen, projection" />
+<link rel="stylesheet" type="text/css" href="/web/css/0226171412_typo_base.css" media="screen, projection" />
+<link rel="stylesheet" type="text/css" href="/web/css/0226171412_style_all.css" media="screen, projection" />
+<link rel="stylesheet" type="text/css" href="/web/css/0226171412_typo_all.css" media="screen, projection" />
+<link rel="stylesheet" type="text/css" href="/web/css/print.css" media="print" />
+</head>
+<body id="www-ansa-it" class="pg-article">
+<div class="pg" id="businesswire-area">
+<div class="bk">
+<div class="corpo" id="content-corpo">(<a href="javascript:popUp('http://www.okkam.org/ens/idb496e0cf-c1d4-46d9-a23d-b002db066009','Ansa')">Ansa</a>) - <a href="javascript:popUp('http://www.okkam.org/ens/id3b243573-2fa4-4460-af72-82827b084ab8','Monza')">Monza</a> - 26 FEB - Fermati i due figli di 16 e 18 anni
+dell'operaio di 44 anni ucciso mercoled� a <a href="javascript:popUp('http://www.okkam.org/ens/id9589a03f-65ee-4a3f-a716-b49c1ad1b338','Desio')">Desio</a> con tre colpi
+di pistola nella sua abitazione. Per il momento non si parla di
+premeditazione nell'ipotesi di omicidio che riguarda i due
+ragazzi. Non si esclude che tra il padre e figli sia scoppiata
+una lite all'ora di cena. � ancora al vaglio degli
+investigatori la posizione della madre dei due fermati. Da tempo
+in casa regnava un'atmosfera pesante a causa delle difficolt�
+economiche.<h1 style="margin: 10px 0 0">Entity section</h1>
+<p>
+<span about="#fake1" rel="dc:subject" resource="http://www.okkam.org/ens/idb496e0cf-c1d4-46d9-a23d-b002db066009"></span><a href="javascript:popUp('http://www.okkam.org/ens/idb496e0cf-c1d4-46d9-a23d-b002db066009','Ansa')"><span typeof="v:Organization" about="http://www.okkam.org/ens/idb496e0cf-c1d4-46d9-a23d-b002db066009"><span property="v:name">Ansa</span></span></a>
+<br>
+<span about="fake2" rel="dc:subject" resource="http://www.okkam.org/ens/id9589a03f-65ee-4a3f-a716-b49c1ad1b338"></span><a href="javascript:popUp('http://www.okkam.org/ens/id9589a03f-65ee-4a3f-a716-b49c1ad1b338','Desio')"><span typeof="okkam:location" about="http://www.okkam.org/ens/id9589a03f-65ee-4a3f-a716-b49c1ad1b338"><span property="v:name">Desio</span></span></a>
+<br>
+<span about="/fake3" rel="dc:subject" resource="http://www.okkam.org/ens/id3b243573-2fa4-4460-af72-82827b084ab8"></span><a href="javascript:popUp('http://www.okkam.org/ens/id3b243573-2fa4-4460-af72-82827b084ab8','Monza')"><span typeof="okkam:location" about="http://www.okkam.org/ens/id3b243573-2fa4-4460-af72-82827b084ab8"><span property="v:name">Monza</span></span></a>
+<br>
+</p>
+</div>
+</div>
+<script type="text/javascript">showImage()</script>
+<!-- 4w_net_content_stop -->
+
+<div class="tools-bar">
+  <ul class="utils">
+    <li class="b-indietro"><a href="javascript:window.history.back();">Indietro</a></li>
+    <li class="b-home"><a href="/">Home</a></li>
+  </ul>
+  <dl class="social-services">
+    <dt>condividi:</dt>
+    <dd>
+	<script type="text/javascript" src="/web/js/share.js"></script>
+
+    </dd>
+  </dl>
+</div>
+</div>
+<!-- only for compatibility -->
+</body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/meta-name-misuse.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/meta-name-misuse.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/meta-name-misuse.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/meta-name-misuse.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns:og="http://opengraphprotocol.org/schema/"
+      xmlns:fb="http://www.facebook.com/2008/fbml">
+<head>
+<meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7" />
+<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
+<script type="text/javascript">var IMDbTimer={starttime: new Date().getTime()};</script>
+<title>Avatar (2009)</title>
+<link rel="canonical" href="http://www.imdb.com/title/tt0499549/" />
+
+<meta name="title" content="Avatar (2009)">
+<meta name="description" content="Directed by James Cameron.  With Sam Worthington, Zoe Saldana, Sigourney Weaver. A paraplegic marine dispatched to the moon Pandora on a unique mission becomes torn between following his orders and protecting the world he feels is his home. Visit IMDb for Photos, Showtimes, Cast, Crew, Reviews, Plot Summary, Comments, Discussions, Taglines, Trailers, Posters, Fan Sites">
+<link rel="image_src" href="http://ia.media-imdb.com/images/M/MV5BMTA3MzcxNTI2MjNeQTJeQWpwZ15BbWU3MDYwMTc0MzM@._V1._SX100_SY122_.jpg">
+<meta name="keywords" content="Reviews, Showtimes, DVDs, Photos, Message Boards, User Ratings, Synopsis, Trailers, Credits">
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SFce97a0af42d1a0706ffaf0e2a3905a42/css2/consumersite.css" />
+<link rel="icon" href="http://i.media-imdb.com/favicon.ico" />
+<link rel="apple-touch-icon" href="http://i.media-media-imdb.com/apple-touch-icon.png" />
+<style type="text/css">.showtimes { font-family: Arial, Helvetica, sans-serif }.showtimes .heading { font-size: 16px; font-weight: bold }.showtimes .time { color: #ff0000 }.tabular { border-collapse: collapse; border: 1px solid #9999ff }.tabular td.heading { background: #bbbbff }.tabular td.heading-right { background: #bbbbff; text-align: right; font-size: small }.tabular td.address { font-size: small; color: #666666; background: #eeeeee }.tabular td.detail { font-size: small; background: #eeeeee }.tabular tr.alternate { background: #eeeeee }.tabular td.item { border: 1px solid #9999ff }</style><link rel="stylesheet" type="text/css" href="http://i.media-imdb.com/images/SF1333771215c45752eab33f6d5dc24a48/tn15/tn15.css" /><link rel="stylesheet" type="text/css" href="http://i.media-imdb.com/images/SFe09f631b4c07d9b145776608ba765d79/css2/site/consumer-vote.css" /><script type="text/javascript" src="http://i.media-imdb.com/images/SF72867cac47f3d7e883837477537ab0a1/js/vote.js" /><
 /script><script type="text/javascript">var __vkey = 'Ge965b-DHv1DDDJgpbfUqAB.qjlz-Vo5c.rJajcuGTowfhqZ4-laOXMaWWrzSikKdzn6mmBpWjlgaVoJIDkqCX';var __vrtg = '8.4';</script>
+<meta name="object_type" content="movie" />
+<meta name="fb_app_id" content="115109575169727" />
+<meta name="og:title" content="Avatar (2009)" />
+<meta name="og:site_name" content="IMDb" />
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SF8a1b7906fdc52a345e61b3d4599feaf7/wheel/base.css" />
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SFa9e2b92f430fe0118df1c87238b828ac/wheel/widgets.css" />
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SF30776d9ce9eaf446e614d17344ca59a7/wheel/layout.css" />
+<!--[if IE]>
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SF6c486df5f1b533d141b118846b74341b/wheel/ie.css" />
+<![endif]-->
+<link rel="stylesheet" type="text/css" href="http://i.media-imdb.com/images/SF8604b262713e5abbbc2206716da5b12e/wheel/fixed.css" />
+<!-- start m/s/a/_g_a_s , head -->
+<script type="text/javascript" src="http://i.media-imdb.com/images/SF8b827f9cfd8f5ee8cebba5a6399960ff/a/js/ads.js"></script>
+<script type="text/javascript">
+    generic.monitoring.set_twilight_info(
+        "title",
+        "IT",
+        "f197c615caa2a3ef679e32eee053888894f0f58d",
+        "2010-05-05T14%3A43%3A08GMT",
+        "http://s.media-imdb.com/twilight/?");
+
+    generic.monitoring.start_timing("page_load");
+
+    generic.cache.set('aan',{
+        type:"js",
+        url:("http://aan.amazon.com/2009-05-01/imdb/default?slot=sitewide-iframe&ord=[CLIENT_SIDE_ORD]").replace(ad_utils.ord_regex, ad_utils.ord),
+        onCall:custom.amazon.aan_iframe_oncall
+    });
+
+    generic.cache.set('fc',{
+        type:"swf",
+        url:"http://ia.media-imdb.com/images/M/MV5BMjE3MDUyNTMyN15BMl5Bc3dmXkFtZTcwNDk1MjUxMw@@._V1_.swf"
+    });
+</script>
+<!-- end m/s/a/_g_a_s , head -->
+<link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SF1022b258dc6fa98eb1b5a429f5f6118d/css2/app/social/sharing.css" ><link rel="stylesheet" type="text/css" media="screen" href="http://i.media-imdb.com/images/SF7d91268275a182d03eeca60bce078e40/css2/app/social/like.css" >
+</head>
+
+<body bgcolor="#ffffff" text="#000000" id="styleguide-v2" class="fixed">
+</body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/missing-og-namespace.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/missing-og-namespace.html?rev=1371537&view=auto
==============================================================================
--- incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/missing-og-namespace.html (added)
+++ incubator/any23/trunk/test-resources/src/test/resources/org/apache/any23/validator/missing-og-namespace.html Fri Aug 10 00:40:13 2012
@@ -0,0 +1,46 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+    <title>Pictures at an Exhibition: The G� - Evgeny Kissin - Spotify</title>
+    <meta name="title" content="Pictures at an Exhibition: The G� - Evgeny Kissin" />
+    <meta name="medium" content="audio" />
+    <meta name="description" content="This is a link to Pictures at an Exhibition: The G� - Evgeny Kissin. Don't have Spotify? Sign-up for unlimited music with Spotify Premium at Spotify.com." />
+    <meta property="og:image" content="http://open.spotify.com/thumb/72b4f7bbf6e4971e4ad5e39f082320c3e512fd55" />
+    <meta property="og:title" content="Pictures at an Exhibition: The G� - Evgeny Kissin" />
+    <meta property="og:type" content="song" />
+    <meta property="og:site_name" content="Spotify" />
+    <meta property="og:url" content="http://open.spotify.com/track/3buSw8cWZThAO864onWXwp" />
+    <meta property="fb:app_id" content="174829003346" />
+    <link rel="shortcut icon" href="/favicon.ico" type="image/vnd.microsoft.icon" />
+    <link rel="audio_src" href="spotify:track:3buSw8cWZThAO864onWXwp" />
+    <link rel="image_src" href="http://open.spotify.com/thumb/72b4f7bbf6e4971e4ad5e39f082320c3e512fd55" />
+    <link rel="stylesheet" media="screen" type="text/css" href="/design/style.css"/>
+
+    <script type="text/javascript">
+    function toggle_more() {
+      document.getElementById('more-artists').style.display = 'block';
+      document.getElementById('more').innerHTML = ',';
+      document.getElementById('more').style.margin = '0';
+    };
+    </script>
+</head>
+  <body>
+  </body>
+</html>
\ No newline at end of file



Mime
View raw message