nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sna...@apache.org
Subject svn commit: r1716573 - in /nutch/trunk: ./ ivy/ src/plugin/parse-tika/ src/plugin/protocol-http/jsp/ src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/
Date Thu, 26 Nov 2015 07:41:02 GMT
Author: snagel
Date: Thu Nov 26 07:41:02 2015
New Revision: 1716573

URL: http://svn.apache.org/viewvc?rev=1716573&view=rev
Log:
NUTCH-2158 Upgrade to Tika 1.11

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/ivy/ivy.xml
    nutch/trunk/src/plugin/parse-tika/ivy.xml
    nutch/trunk/src/plugin/parse-tika/plugin.xml
    nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
    nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
    nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
    nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
    nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Nov 26 07:41:02 2015
@@ -3,6 +3,8 @@ Nutch Change Log
 Nutch 1.11 Release 25/10/2015 (dd/mm/yyyy)
 Release Report: http://s.apache.org/nutch11
 
+* NUTCH-2158 Upgrade to Tika 1.11 (jnioche, snagel)
+
 * NUTCH-2175 Typos in property descriptions in nutch-default.xml (Roannel Fernández Hernández
via snagel)
 
 * NUTCH-2069 Ignore external links based on domain (jnioche)

Modified: nutch/trunk/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/ivy/ivy.xml?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/ivy/ivy.xml (original)
+++ nutch/trunk/ivy/ivy.xml Thu Nov 26 07:41:02 2015
@@ -64,7 +64,7 @@
         <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="2.4.0"
conf="*->default"/>
         <!-- End of Hadoop Dependencies -->
 
-		<dependency org="org.apache.tika" name="tika-core" rev="1.10" />
+		<dependency org="org.apache.tika" name="tika-core" rev="1.11" />
 		<dependency org="com.ibm.icu" name="icu4j" rev="55.1" />
 
 		<dependency org="xerces" name="xercesImpl" rev="2.9.1" />

Modified: nutch/trunk/src/plugin/parse-tika/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/ivy.xml?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/ivy.xml (original)
+++ nutch/trunk/src/plugin/parse-tika/ivy.xml Thu Nov 26 07:41:02 2015
@@ -36,7 +36,7 @@
   </publications>
 
   <dependencies>
-    <dependency org="org.apache.tika" name="tika-parsers" rev="1.10" conf="*->default">
+    <dependency org="org.apache.tika" name="tika-parsers" rev="1.11" conf="*->default">
      <exclude org="org.apache.tika" name="tika-core" />
      <exclude org="org.apache.httpcomponents" name="httpclient" />
      <exclude org="org.apache.httpcomponents" name="httpcore" />

Modified: nutch/trunk/src/plugin/parse-tika/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/plugin.xml?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/plugin.xml (original)
+++ nutch/trunk/src/plugin/parse-tika/plugin.xml Thu Nov 26 07:41:02 2015
@@ -27,7 +27,7 @@
       </library>
       <library name="apache-mime4j-core-0.7.2.jar"/>
       <library name="apache-mime4j-dom-0.7.2.jar"/>
-      <library name="asm-debug-all-4.1.jar"/>
+      <library name="asm-5.0.4.jar"/>
       <library name="aspectjrt-1.8.0.jar"/>
       <library name="bcmail-jdk15on-1.52.jar"/>
       <library name="bcpkix-jdk15on-1.52.jar"/>
@@ -37,7 +37,7 @@
       <library name="c3p0-0.9.1.1.jar"/>
       <library name="cdm-4.5.5.jar"/>
       <library name="commons-codec-1.6.jar"/>
-      <library name="commons-compress-1.9.jar"/>
+      <library name="commons-compress-1.10.jar"/>
       <library name="commons-csv-1.0.jar"/>
       <library name="commons-exec-1.3.jar"/>
       <library name="commons-io-2.4.jar"/>
@@ -45,17 +45,23 @@
       <library name="commons-logging-1.1.3.jar"/>
       <library name="commons-logging-api-1.1.jar"/>
       <library name="commons-vfs2-2.0.jar"/>
+      <library name="cxf-core-3.0.3.jar"/>
+      <library name="cxf-rt-frontend-jaxrs-3.0.3.jar"/>
+      <library name="cxf-rt-rs-client-3.0.3.jar"/>
+      <library name="cxf-rt-transports-http-3.0.3.jar"/>
       <library name="ehcache-core-2.6.2.jar"/>
       <library name="fontbox-1.8.10.jar"/>
       <library name="geoapi-3.0.0.jar"/>
       <library name="grib-4.5.5.jar"/>
-      <library name="guava-11.0.2.jar"/>
+      <library name="guava-17.0.jar"/>
       <library name="httpmime-4.2.6.jar"/>
       <library name="httpservices-4.5.5.jar"/>
       <library name="isoparser-1.0.2.jar"/>
       <library name="jackcess-2.1.2.jar"/>
-      <library name="jackcess-encrypt-2.1.0.jar"/>
+      <library name="jackcess-encrypt-2.1.1.jar"/>
       <library name="java-libpst-0.8.1.jar"/>
+      <library name="javax.annotation-api-1.2.jar"/>
+      <library name="javax.ws.rs-api-2.0.1.jar"/>
       <library name="jcip-annotations-1.0.jar"/>
       <library name="jcommander-1.35.jar"/>
       <library name="jdom-1.0.jar"/>
@@ -66,10 +72,10 @@
       <library name="jmatio-1.0.jar"/>
       <library name="jna-4.1.0.jar"/>
       <library name="joda-time-2.2.jar"/>
+      <library name="json-20140107.jar"/>
       <library name="json-simple-1.1.1.jar"/>
       <library name="jsoup-1.7.2.jar"/>
       <library name="jsr-275-0.9.3.jar"/>
-      <library name="jsr305-1.3.9.jar"/>
       <library name="juniversalchardet-1.0.3.jar"/>
       <library name="junrar-0.7.jar"/>
       <library name="jwnl-1.3.3.jar"/>
@@ -82,10 +88,10 @@
       <library name="opennlp-tools-1.5.3.jar"/>
       <library name="pdfbox-1.8.10.jar"/>
       <library name="plexus-utils-1.5.6.jar"/>
-      <library name="poi-3.13-beta1.jar"/>
-      <library name="poi-ooxml-3.13-beta1.jar"/>
-      <library name="poi-ooxml-schemas-3.13-beta1.jar"/>
-      <library name="poi-scratchpad-3.13-beta1.jar"/>
+      <library name="poi-3.13.jar"/>
+      <library name="poi-ooxml-3.13.jar"/>
+      <library name="poi-ooxml-schemas-3.13.jar"/>
+      <library name="poi-scratchpad-3.13.jar"/>
       <library name="protobuf-java-2.5.0.jar"/>
       <library name="quartz-2.2.0.jar"/>
       <library name="regexp-1.3.jar"/>
@@ -96,12 +102,15 @@
       <library name="sis-storage-0.5.jar"/>
       <library name="sis-utility-0.5.jar"/>
       <library name="slf4j-api-1.7.12.jar"/>
+      <library name="stax2-api-3.1.4.jar"/>
       <library name="tagsoup-1.2.1.jar"/>
-      <library name="tika-parsers-1.10.jar"/>
+      <library name="tika-parsers-1.11.jar"/>
       <library name="udunits-4.5.5.jar"/>
       <library name="vorbis-java-core-0.6.jar"/>
       <library name="vorbis-java-tika-0.6.jar"/>
+      <library name="woodstox-core-asl-4.4.1.jar"/>
       <library name="xmlbeans-2.6.0.jar"/>
+      <library name="xmlschema-core-2.1.0.jar"/>
       <library name="xmpcore-5.1.2.jar"/>
       <library name="xz-1.5.jar"/>
    </runtime>

Modified: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (original)
+++ nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp Thu Nov 26 07:41:02 2015
@@ -21,7 +21,7 @@ String basePath = request.getScheme()+":
 %>
 
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
+<html>
   <head>
     <base href="<%=basePath%>">
     

Modified: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (original)
+++ nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Nov 26 07:41:02 2015
@@ -24,7 +24,7 @@ String basePath = request.getScheme()+":
 
 
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
+<html>
   <head>
     <base href="<%=basePath%>">
     

Modified: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (original)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp Thu Nov 26 07:41:02 2015
@@ -21,7 +21,7 @@ String basePath = request.getScheme()+":
 %>
 
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
+<html>
   <head>
     <base href="<%=basePath%>">
     

Modified: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (original)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp Thu Nov 26 07:41:02 2015
@@ -21,7 +21,7 @@ String basePath = request.getScheme()+":
 %>
 
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
+<html>
   <head>
     <base href="<%=basePath%>">
     

Modified: nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1716573&r1=1716572&r2=1716573&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
(original)
+++ nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
Thu Nov 26 07:41:02 2015
@@ -133,7 +133,7 @@ public class TestProtocolHttp {
     if (page.compareTo("/nonexists.html") != 0
         && page.compareTo("/brokenpage.jsp") != 0
         && page.compareTo("/redirection") != 0) {
-      assertEquals("ContentType " + url, "application/xhtml+xml",
+      assertEquals("ContentType " + url, "text/html",
           content.getContentType());
     }
   }



Mime
View raw message