manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1655916 - in /manifoldcf/branches/dev_1x: ./ CHANGES.txt connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Date Fri, 30 Jan 2015 02:29:03 GMT
Author: kwright
Date: Fri Jan 30 02:29:02 2015
New Revision: 1655916

URL: http://svn.apache.org/r1655916
Log:
Pull up second fix for CONNECTORS-1154 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Jan 30 02:29:02 2015
@@ -121,4 +121,4 @@
 /manifoldcf/branches/CONNECTORS-981:1605049-1605773
 /manifoldcf/branches/CONNECTORS-989:1611600-1612101
 /manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk:1620703,1620748,1620812,1620862,1621449,1621613,1621855,1622213,1622740,1622850,1622853-1622854,1623249,1623251,1623314,1623599,1623951,1623953-1623954,1623956,1623972,1624058,1624085,1624174,1624236,1624377,1624384,1624399,1624449,1624464,1624504,1624729-1624731,1624906,1624909-1624910,1624982,1625023,1625095,1625103,1625108,1625264,1625270,1625394,1625400,1625910,1626090,1626097,1626102,1626638-1626639,1626973,1627687,1627690,1627959,1628046,1628066,1628106,1628168,1628188,1628699,1628798,1628808,1628845,1628905,1629122,1629374-1629375,1629379,1629541,1629994,1630188,1630535,1630623,1630671,1630812,1630885,1631039,1631162,1631164,1631252,1631750,1631953,1632013,1632225,1632289,1632562,1632844,1632847,1632854,1633062-1633063,1633108,1633193,1633202,1633282,1633284,1633295,1633336,1633339,1633345,1633348,1633364,1633378,1633383,1633432,1633546,1633590,1633634,1633668,1633727,1633760,1633764,1633786,1633910,1633923,1634021,1634028,1634067,1634132,1634145,1634148,163
 4155,1634188,1634202,1634264,1634373,1634530,1634688,1634850,1634857,1635103,1635116,1635421,1635438,1635478,1635481,1635484,1635490,1635809,1635939,1636146,1636167,1636180,1636207,1636215,1636232,1636334,1636519,1636570,1636684,1636940,1637011,1637310,1637350,1637364,1637373,1637378,1639259,1639593,1639600,1640018,1640101,1640199,1640204,1640208,1640314,1640319,1640749,1640772,1640805,1640888,1640925,1640941-1640942,1641222,1641328,1641557,1641559,1641629,1641633,1641724,1641754,1641911,1642163,1642255,1642318,1642531,1642650,1642658,1642673,1642716,1644197,1644399,1644538,1644920,1644931,1646317,1646397,1646403,1646408,1646640,1646947,1647574,1647585,1647608,1648686,1648976,1649201,1649203,1649529,1649605,1649628,1649794,1650351,1650722,1650741-1650742,1650745,1650747,1650911,1650954,1651332,1651539,1651907,1651921,1652071,1652974,1653175,1653899,1654651,1655205,1655261,1655264,1655377,1655411,1655618
+/manifoldcf/trunk:1620703,1620748,1620812,1620862,1621449,1621613,1621855,1622213,1622740,1622850,1622853-1622854,1623249,1623251,1623314,1623599,1623951,1623953-1623954,1623956,1623972,1624058,1624085,1624174,1624236,1624377,1624384,1624399,1624449,1624464,1624504,1624729-1624731,1624906,1624909-1624910,1624982,1625023,1625095,1625103,1625108,1625264,1625270,1625394,1625400,1625910,1626090,1626097,1626102,1626638-1626639,1626973,1627687,1627690,1627959,1628046,1628066,1628106,1628168,1628188,1628699,1628798,1628808,1628845,1628905,1629122,1629374-1629375,1629379,1629541,1629994,1630188,1630535,1630623,1630671,1630812,1630885,1631039,1631162,1631164,1631252,1631750,1631953,1632013,1632225,1632289,1632562,1632844,1632847,1632854,1633062-1633063,1633108,1633193,1633202,1633282,1633284,1633295,1633336,1633339,1633345,1633348,1633364,1633378,1633383,1633432,1633546,1633590,1633634,1633668,1633727,1633760,1633764,1633786,1633910,1633923,1634021,1634028,1634067,1634132,1634145,1634148,163
 4155,1634188,1634202,1634264,1634373,1634530,1634688,1634850,1634857,1635103,1635116,1635421,1635438,1635478,1635481,1635484,1635490,1635809,1635939,1636146,1636167,1636180,1636207,1636215,1636232,1636334,1636519,1636570,1636684,1636940,1637011,1637310,1637350,1637364,1637373,1637378,1639259,1639593,1639600,1640018,1640101,1640199,1640204,1640208,1640314,1640319,1640749,1640772,1640805,1640888,1640925,1640941-1640942,1641222,1641328,1641557,1641559,1641629,1641633,1641724,1641754,1641911,1642163,1642255,1642318,1642531,1642650,1642658,1642673,1642716,1644197,1644399,1644538,1644920,1644931,1646317,1646397,1646403,1646408,1646640,1646947,1647574,1647585,1647608,1648686,1648976,1649201,1649203,1649529,1649605,1649628,1649794,1650351,1650722,1650741-1650742,1650745,1650747,1650911,1650954,1651332,1651539,1651907,1651921,1652071,1652974,1653175,1653899,1654651,1655205,1655261,1655264,1655377,1655411,1655618,1655914

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1655916&r1=1655915&r2=1655916&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Fri Jan 30 02:29:02 2015
@@ -3,6 +3,11 @@ $Id$
 
 ======================= 1.9-dev =====================
 
+CONNECTORS-1154: Changes to the flow through the web connector's
+deflate/unzip logic, designed to prevent gzip headers from being read
+if the stream's contents are not read.
+(Li Minhui, Karl Wright)
+
 CONNECTORS-1153: Broken authority name comparison is preventing
 incremental when no authority is used.
 (Aeham Abushwashi, Karl Wright)

Modified: manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1655916&r1=1655915&r2=1655916&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
(original)
+++ manifoldcf/branches/dev_1x/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Fri Jan 30 02:29:02 2015
@@ -903,7 +903,12 @@ public class ThrottledFetcher
         throw new ManifoldCFException("Attempt to get an input stream when no method thread");
       try
       {
-        return methodThread.getSafeInputStream();
+        InputStream bodyStream = methodThread.getSafeInputStream();
+        if (methodThread.isGZipStream())
+          bodyStream = new GZIPInputStream(bodyStream);
+        else if (methodThread.isDeflateStream())
+          bodyStream = new DeflateInputStream(bodyStream);
+        return bodyStream;
       }
       catch (InterruptedException e)
       {
@@ -1373,6 +1378,8 @@ public class ThrottledFetcher
     protected boolean streamCreated = false;
     protected Throwable streamException = null;
     protected boolean abortThread = false;
+    protected boolean gzip = false;
+    protected boolean deflate = false;
 
     protected Throwable shutdownException = null;
 
@@ -1457,8 +1464,6 @@ public class ThrottledFetcher
               {
                 try
                 {
-                  boolean gzip = false;
-                  boolean deflate = false;
                   Header ceheader = response.getEntity().getContentEncoding();
                   if (ceheader != null)
                   {
@@ -1483,10 +1488,6 @@ public class ThrottledFetcher
                   if (bodyStream != null)
                   {
                     bodyStream = new ThrottledInputstream(fetchThrottler.createFetchStream(),theConnection,bodyStream);
-                    if (gzip)
-                      bodyStream = new GZIPInputStream(bodyStream);
-                    else if (deflate)
-                      bodyStream = new DeflateInputStream(bodyStream);
                     threadStream = new XThreadInputStream(bodyStream);
                   }
                   streamCreated = true;
@@ -1644,6 +1645,46 @@ public class ThrottledFetcher
         }
       }
     }
+
+    public boolean isGZipStream()
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait until stream is created, or until we note an exception was thrown.
+      while (true)
+      {
+        synchronized (this)
+        {
+          if (responseException != null)
+            throw new IllegalStateException("Check for response before getting stream");
+          if (cookieException != null)
+            throw new IllegalStateException("Check for cookies before getting stream");
+          checkException(streamException);
+          if (streamCreated)
+            return gzip;
+          wait();
+        }
+      }
+    }    
+
+    public boolean isDeflateStream()
+      throws InterruptedException, IOException, HttpException
+    {
+      // Must wait until stream is created, or until we note an exception was thrown.
+      while (true)
+      {
+        synchronized (this)
+        {
+          if (responseException != null)
+            throw new IllegalStateException("Check for response before getting stream");
+          if (cookieException != null)
+            throw new IllegalStateException("Check for cookies before getting stream");
+          checkException(streamException);
+          if (streamCreated)
+            return deflate;
+          wait();
+        }
+      }
+    }    
     
     public InputStream getSafeInputStream()
       throws InterruptedException, IOException, HttpException



Mime
View raw message