Return-Path: X-Original-To: apmail-manifoldcf-commits-archive@www.apache.org Delivered-To: apmail-manifoldcf-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BA9B610B12 for ; Fri, 26 Apr 2013 10:48:47 +0000 (UTC) Received: (qmail 9394 invoked by uid 500); 26 Apr 2013 10:48:47 -0000 Delivered-To: apmail-manifoldcf-commits-archive@manifoldcf.apache.org Received: (qmail 9241 invoked by uid 500); 26 Apr 2013 10:48:44 -0000 Mailing-List: contact commits-help@manifoldcf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@manifoldcf.apache.org Delivered-To: mailing list commits@manifoldcf.apache.org Received: (qmail 9208 invoked by uid 99); 26 Apr 2013 10:48:43 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 26 Apr 2013 10:48:43 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 26 Apr 2013 10:48:39 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 832AB23888E7; Fri, 26 Apr 2013 10:48:18 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1476129 - in /manifoldcf/trunk: ./ connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ Date: Fri, 26 Apr 2013 10:48:18 -0000 To: commits@manifoldcf.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130426104818.832AB23888E7@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kwright Date: Fri Apr 26 10:48:17 2013 New Revision: 1476129 URL: http://svn.apache.org/r1476129 Log: Real fix for CONNECTORS-279. Modified: manifoldcf/trunk/CHANGES.txt manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Modified: manifoldcf/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1476129&r1=1476128&r2=1476129&view=diff ============================================================================== --- manifoldcf/trunk/CHANGES.txt (original) +++ manifoldcf/trunk/CHANGES.txt Fri Apr 26 10:48:17 2013 @@ -3,9 +3,10 @@ $Id$ ======================= 1.2-dev ===================== -CONNECTORS-679: Web connector hangs on some JVMs due to faulty -optimizations. Added volatile qualifiers for some of the key variables -that seem to be involved in this, and a test. +CONNECTORS-679: Web connector hangs during throttling. Reason +appears to be that it is possible to interrupt the beginRead() method +after it goes into "obtain estimate" mode. Added code to make it clean +up in that case. Also applied to RSS connector. (Erlend Garåsen, Karl Wright) CONNECTORS-677: Close body streams where required. Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff ============================================================================== --- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java (original) +++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java Fri Apr 26 10:48:17 2013 @@ -1123,34 +1123,51 @@ public class ThrottledFetcher } } - long waitTime = 0L; - synchronized (this) + // It is possible for the following code to get interrupted. If that happens, + // we have to unstick the threads that are waiting on the estimate! + boolean finished = false; + try { - // Add these bytes to the estimated total - totalBytesRead += (long)byteCount; + long waitTime = 0L; + synchronized (this) + { + // Add these bytes to the estimated total + totalBytesRead += (long)byteCount; - // Estimate the time this read will take, and wait accordingly - long estimatedTime = (long)(rateEstimate * (double)byteCount); + // Estimate the time this read will take, and wait accordingly + long estimatedTime = (long)(rateEstimate * (double)byteCount); - // Figure out how long the total byte count should take, to meet the constraint - long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer); + // Figure out how long the total byte count should take, to meet the constraint + long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer); - // The wait time is the different between our desired end time, minus the estimated time to read the data, and the - // current time. But it can't be negative. - waitTime = (desiredEndTime - estimatedTime) - currentTime; - } + // The wait time is the different between our desired end time, minus the estimated time to read the data, and the + // current time. But it can't be negative. + waitTime = (desiredEndTime - estimatedTime) - currentTime; + } + + if (waitTime > 0L) + { + if (Logging.connectors.isDebugEnabled()) + Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+ + new Long(waitTime).toString()+" ms."); + ManifoldCF.sleep(waitTime); + } - if (waitTime > 0L) + //if (Logging.connectors.isTraceEnabled()) + // Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'"); + finished = true; + } + finally { - if (Logging.connectors.isDebugEnabled()) - Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+ - new Long(waitTime).toString()+" ms."); - ManifoldCF.sleep(waitTime); + if (!finished) + { + if (estimateInProgress) + { + estimateInProgress = false; + firstChunkLock.notifyAll(); + } + } } - - //if (Logging.connectors.isTraceEnabled()) - // Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'"); - } /** Note the end of an individual read from the server. Call this just after an individual read completes. Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff ============================================================================== --- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original) +++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Fri Apr 26 10:48:17 2013 @@ -832,31 +832,48 @@ public class ThrottledFetcher } } - long waitTime = 0L; - synchronized (this) + // It is possible for the following code to get interrupted. If that happens, + // we have to unstick the threads that are waiting on the estimate! + boolean finished = false; + try { - // Add these bytes to the estimated total - totalBytesRead += (long)byteCount; + long waitTime = 0L; + synchronized (this) + { + // Add these bytes to the estimated total + totalBytesRead += (long)byteCount; - // Estimate the time this read will take, and wait accordingly - long estimatedTime = (long)(rateEstimate * (double)byteCount); + // Estimate the time this read will take, and wait accordingly + long estimatedTime = (long)(rateEstimate * (double)byteCount); - // Figure out how long the total byte count should take, to meet the constraint - long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer); + // Figure out how long the total byte count should take, to meet the constraint + long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer); - // The wait time is the different between our desired end time, minus the estimated time to read the data, and the - // current time. But it can't be negative. - waitTime = (desiredEndTime - estimatedTime) - currentTime; - } + // The wait time is the different between our desired end time, minus the estimated time to read the data, and the + // current time. But it can't be negative. + waitTime = (desiredEndTime - estimatedTime) - currentTime; + } - if (waitTime > 0L) + if (waitTime > 0L) + { + if (Logging.connectors.isDebugEnabled()) + Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+ + new Long(waitTime).toString()+" ms."); + ManifoldCF.sleep(waitTime); + } + finished = true; + } + finally { - if (Logging.connectors.isDebugEnabled()) - Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+ - new Long(waitTime).toString()+" ms."); - ManifoldCF.sleep(waitTime); + if (!finished) + { + if (estimateInProgress) + { + estimateInProgress = false; + firstChunkLock.notifyAll(); + } + } } - } /** Note the end of an individual read from the server. Call this just after an individual read completes.