Return-Path: X-Original-To: apmail-manifoldcf-commits-archive@www.apache.org Delivered-To: apmail-manifoldcf-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 308BB109A0 for ; Tue, 1 Jul 2014 10:40:31 +0000 (UTC) Received: (qmail 70694 invoked by uid 500); 1 Jul 2014 10:40:30 -0000 Delivered-To: apmail-manifoldcf-commits-archive@manifoldcf.apache.org Received: (qmail 70643 invoked by uid 500); 1 Jul 2014 10:40:30 -0000 Mailing-List: contact commits-help@manifoldcf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@manifoldcf.apache.org Delivered-To: mailing list commits@manifoldcf.apache.org Received: (qmail 70634 invoked by uid 99); 1 Jul 2014 10:40:30 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 01 Jul 2014 10:40:30 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 01 Jul 2014 10:40:30 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 56724238897A; Tue, 1 Jul 2014 10:40:05 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1607036 - in /manifoldcf/trunk: CHANGES.txt framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java Date: Tue, 01 Jul 2014 10:40:05 -0000 To: commits@manifoldcf.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140701104005.56724238897A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kwright Date: Tue Jul 1 10:40:04 2014 New Revision: 1607036 URL: http://svn.apache.org/r1607036 Log: Fix for CONNECTORS-988 Modified: manifoldcf/trunk/CHANGES.txt manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java Modified: manifoldcf/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1607036&r1=1607035&r2=1607036&view=diff ============================================================================== --- manifoldcf/trunk/CHANGES.txt (original) +++ manifoldcf/trunk/CHANGES.txt Tue Jul 1 10:40:04 2014 @@ -3,6 +3,9 @@ $Id$ ======================= 1.7-dev ===================== +CONNECTORS-988: Performance improvements for split pipeline crawls. +(Karl Wright) + CONNECTORS-985: Get UI tests working again. (Karl Wright) Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java?rev=1607036&r1=1607035&r2=1607036&view=diff ============================================================================== --- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java (original) +++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java Tue Jul 1 10:40:04 2014 @@ -40,10 +40,10 @@ public class RepositoryDocumentFactory protected final RepositoryDocument original; // The binary stream file and stream (if any) - protected BinaryTracker binaryTracker; + protected BinaryInput binaryTracker; // Readers (organized by metadata) - protected Map metadataReaders = new HashMap(); + protected Map metadataReaders = new HashMap(); /** Constructor. * Pass a RepositoryDocument. This constructor reads all streams and stores them in @@ -59,7 +59,7 @@ public class RepositoryDocumentFactory try { - this.binaryTracker = new BinaryTracker(document.getBinaryStream()); + this.binaryTracker = new TempFileInput(document.getBinaryStream()); // Copy all reader streams Iterator iter = document.getFields(); if (iter.hasNext()) @@ -68,12 +68,12 @@ public class RepositoryDocumentFactory Object[] objects = document.getField(fieldName); if (objects instanceof Reader[]) { - ReaderTracker[] newValues = new ReaderTracker[objects.length]; + CharacterInput[] newValues = new CharacterInput[objects.length]; metadataReaders.put(fieldName,newValues); // Populate newValues for (int i = 0; i < newValues.length; i++) { - newValues[i] = new ReaderTracker((Reader)objects[i]); + newValues[i] = new TempFileCharacterInput((Reader)objects[i]); } } } @@ -82,14 +82,14 @@ public class RepositoryDocumentFactory { // Clean up everything we've done so far. if (this.binaryTracker != null) - this.binaryTracker.close(); + this.binaryTracker.discard(); for (String key : metadataReaders.keySet()) { - ReaderTracker[] rt = metadataReaders.get(key); - for (ReaderTracker r : rt) + CharacterInput[] rt = metadataReaders.get(key); + for (CharacterInput r : rt) { if (r != null) - r.close(); + r.discard(); } } if (e instanceof IOException) @@ -129,7 +129,8 @@ public class RepositoryDocumentFactory } // Copy binary - rd.setBinary(binaryTracker.createNewInputStream(),original.getBinaryLength()); + binaryTracker.doneWithStream(); + rd.setBinary(binaryTracker.getStream(),original.getBinaryLength()); // Copy metadata fields (including minting new Readers where needed) Iterator iter = original.getFields(); if (iter.hasNext()) @@ -138,11 +139,12 @@ public class RepositoryDocumentFactory Object[] objects = original.getField(fieldName); if (objects instanceof Reader[]) { - ReaderTracker[] rts = metadataReaders.get(fieldName); + CharacterInput[] rts = metadataReaders.get(fieldName); Reader[] newReaders = new Reader[rts.length]; for (int i = 0; i < rts.length; i++) { - newReaders[i] = rts[i].createNewReader(); + rts[i].doneWithStream(); + newReaders[i] = rts[i].getStream(); } rd.addField(fieldName,newReaders); } @@ -166,199 +168,15 @@ public class RepositoryDocumentFactory public void close() throws ManifoldCFException { - binaryTracker.close(); + binaryTracker.discard(); for (String key : metadataReaders.keySet()) { - ReaderTracker[] rt = metadataReaders.get(key); - for (ReaderTracker r : rt) + CharacterInput[] rt = metadataReaders.get(key); + for (CharacterInput r : rt) { - r.close(); + r.discard(); } } } - protected static class ReaderTracker - { - protected File readerFile; - protected Reader reader = null; - - public ReaderTracker(Reader r) - throws IOException - { - // Make a local copy - readerFile = File.createTempFile("mcfrdr","tmp"); - try - { - FileOutputStream os = new FileOutputStream(readerFile); - try - { - OutputStreamWriter ow = new OutputStreamWriter(os,"utf-8"); - try - { - char[] byteArray = new char[65536]; - while (true) - { - int amt = r.read(byteArray,0,byteArray.length); - if (amt == -1) - break; - ow.write(byteArray,0,amt); - } - } - finally - { - ow.flush(); - } - } - finally - { - os.close(); - } - } - catch (Throwable e) - { - readerFile.delete(); - if (e instanceof IOException) - throw (IOException)e; - else if (e instanceof RuntimeException) - throw (RuntimeException)e; - else if (e instanceof Error) - throw (Error)e; - else - throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e); - } - } - - public Reader createNewReader() - throws ManifoldCFException - { - try - { - // Close existing inputstream and create a new one. - if (reader != null) - { - reader.close(); - reader = null; - } - reader = new InputStreamReader(new FileInputStream(readerFile),"utf-8"); - return reader; - } - catch (IOException e) - { - handleIOException(e); - return null; - } - } - - public void close() - throws ManifoldCFException - { - try - { - // Close all streams and delete file - if (reader != null) - { - reader.close(); - reader = null; - } - readerFile.delete(); - } - catch (IOException e) - { - handleIOException(e); - } - } - } - - protected static class BinaryTracker - { - protected File binaryFile; - protected InputStream inputStream = null; - - public BinaryTracker(InputStream is) - throws IOException - { - // Make a local copy - binaryFile = File.createTempFile("mcfbin","tmp"); - try - { - FileOutputStream os = new FileOutputStream(binaryFile); - try - { - byte[] byteArray = new byte[65536]; - while (true) - { - int amt = is.read(byteArray,0,byteArray.length); - if (amt == -1) - break; - os.write(byteArray,0,amt); - } - } - finally - { - os.close(); - } - } - catch (Throwable e) - { - binaryFile.delete(); - if (e instanceof IOException) - throw (IOException)e; - else if (e instanceof RuntimeException) - throw (RuntimeException)e; - else if (e instanceof Error) - throw (Error)e; - else - throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e); - } - } - - public InputStream createNewInputStream() - throws ManifoldCFException - { - try - { - // Close existing inputstream and create a new one. - if (inputStream != null) - { - inputStream.close(); - inputStream = null; - } - inputStream = new FileInputStream(binaryFile); - return inputStream; - } - catch (IOException e) - { - handleIOException(e); - return null; - } - } - - public void close() - throws ManifoldCFException - { - try - { - // Close all streams and delete file - if (inputStream != null) - { - inputStream.close(); - inputStream = null; - } - binaryFile.delete(); - } - catch (IOException e) - { - handleIOException(e); - } - } - } - - protected static void handleIOException(IOException e) - throws ManifoldCFException - { - if (e instanceof InterruptedIOException) - throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED); - throw new ManifoldCFException(e.getMessage(),e); - } - }