creadur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pottlin...@apache.org
Subject svn commit: r1714849 - in /creadur/tentacles/trunk: RELEASE_NOTES.txt pom.xml src/main/java/org/apache/creadur/tentacles/IOSystem.java src/main/java/org/apache/creadur/tentacles/NexusClient.java
Date Tue, 17 Nov 2015 18:48:57 GMT
Author: pottlinger
Date: Tue Nov 17 18:48:57 2015
New Revision: 1714849

URL: http://svn.apache.org/viewvc?rev=1714849&view=rev
Log:
TENTACLES-9: Add patch to have a retr strategy during crawl phase (patch)


Modified:
    creadur/tentacles/trunk/RELEASE_NOTES.txt
    creadur/tentacles/trunk/pom.xml
    creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java
    creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java

Modified: creadur/tentacles/trunk/RELEASE_NOTES.txt
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/RELEASE_NOTES.txt?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/RELEASE_NOTES.txt (original)
+++ creadur/tentacles/trunk/RELEASE_NOTES.txt Tue Nov 17 18:48:57 2015
@@ -11,3 +11,4 @@ Tentacles 0.1 SNAPSHOT
     * [TENTACLES-3] - provide help text if runtime parameters are missing
     * [TENTACLES-2] - use proper escaping in Velocity template files.
     * [TENTACLES-1] - allow filtering of directories in LicenseFilter
+    * [TENTACLES-9] - adding retry during crawl (thanks to Andy Gumbrecht)

Modified: creadur/tentacles/trunk/pom.xml
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/pom.xml?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/pom.xml (original)
+++ creadur/tentacles/trunk/pom.xml Tue Nov 17 18:48:57 2015
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
-    <version>14</version>
+    <version>17</version>
   </parent>
   <groupId>org.apache.creadur.tentacles</groupId>
   <artifactId>apache-tentacles</artifactId>
@@ -74,7 +74,7 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <javaVersion>1.6</javaVersion>
-    <httpClientVersion>4.3.5</httpClientVersion>
+    <httpClientVersion>4.3.6</httpClientVersion>
     <apacheRatVersion>0.11</apacheRatVersion>
   </properties>
   <issueManagement>

Modified: creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java (original)
+++ creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java Tue Nov
17 18:48:57 2015
@@ -16,26 +16,12 @@
  */
 package org.apache.creadur.tentacles;
 
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.Flushable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import org.apache.log4j.Logger;
+
+import java.io.*;
 import java.net.URL;
 import java.util.zip.ZipInputStream;
 
-import org.apache.log4j.Logger;
-
 /**
  * @version $Rev$ $Date$
  */
@@ -117,12 +103,12 @@ public class IOSystem {
                 ((Flushable) closeable).flush();
             }
         } catch (final IOException e) {
-        	LOG.error("Error when trying to flush before closing " + closeable, e);
+        	LOG.trace("Error when trying to flush before closing " + closeable, e);
         }
         try {
             closeable.close();
         } catch (final IOException e) {
-        	LOG.error("Error when trying to close " + closeable, e);
+        	LOG.trace("Error when trying to close " + closeable, e);
         }
     }
 

Modified: creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java (original)
+++ creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java Tue
Nov 17 18:48:57 2015
@@ -16,152 +16,171 @@
  */
 package org.apache.creadur.tentacles;
 
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.util.LinkedHashSet;
-import java.util.Set;
-
 import org.apache.http.Header;
 import org.apache.http.HttpHeaders;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.methods.HttpUriRequest;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClientBuilder;
 import org.apache.log4j.Logger;
 import org.codehaus.swizzle.stream.StreamLexer;
 
-public class NexusClient {
-
-	private static final Logger log = Logger.getLogger(NexusClient.class);
-	private static final String SLASH = "/";
-	private static final String ONE_UP = "../";
-	private static final String USER_AGENT_CONTENTS = "Mozilla/5.0 (X11; U; Linux x86_64; en-US;
rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13";
-
-	private final CloseableHttpClient client;
-	private final FileSystem fileSystem;
-	private final IOSystem ioSystem;
-
-	public NexusClient(final Platform platform) {
-
-		System.setProperty("http.keepAlive", "false");
-		System.setProperty("http.maxConnections", "50");
-
-		this.client = HttpClientBuilder.create().disableContentCompression()
-				.build();
-		this.fileSystem = platform.getFileSystem();
-		this.ioSystem = platform.getIoSystem();
-	}
-
-	public File download(final URI uri, final File file) throws IOException {
-		if (file.exists()) {
-
-			final long length = getContentLength(uri);
-
-			if (file.length() == length) {
-				log.info("Exists " + uri);
-				return file;
-			} else {
-				log.info("Incomplete " + uri);
-			}
-		}
-
-		log.info("Download " + uri);
-
-		final CloseableHttpResponse response = get(uri);
-
-		InputStream content = null;
-		try {
-			content = response.getEntity().getContent();
-
-			this.fileSystem.mkparent(file);
-
-			this.ioSystem.copy(content, file);
-		} finally {
-			if (content != null) {
-				content.close();
-			}
-
-			response.close();
-		}
-
-		return file;
-	}
-
-	private Long getContentLength(final URI uri) throws IOException {
-		final CloseableHttpResponse head = head(uri);
-		final Header[] headers = head.getHeaders(HttpHeaders.CONTENT_LENGTH);
-
-		if (headers != null && headers.length >= 1) {
-			return Long.valueOf(headers[0].getValue());
-		}
-
-		head.close();
-
-		return Long.valueOf(-1);
-	}
-
-	private CloseableHttpResponse get(final URI uri) throws IOException {
-		final HttpGet request = new HttpGet(uri);
-		request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
-		return this.client.execute(request);
-	}
-
-	private CloseableHttpResponse head(final URI uri) throws IOException {
-		final HttpHead request = new HttpHead(uri);
-		request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
-		return this.client.execute(request);
-	}
-
-	public Set<URI> crawl(final URI index) throws IOException {
-		log.info("Crawl " + index);
-		final Set<URI> resources = new LinkedHashSet<URI>();
-
-		final CloseableHttpResponse response = get(index);
-
-		final InputStream content = response.getEntity().getContent();
-		final StreamLexer lexer = new StreamLexer(content);
-
-		final Set<URI> crawl = new LinkedHashSet<URI>();
-
-		// <a
-		// href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
-		while (lexer.readAndMark("<a ", "/a>")) {
-
-			try {
-				final String link = lexer.peek("href=\"", "\"");
-				final String name = lexer.peek(">", "<");
-
-				final URI uri = index.resolve(link);
-
-				if (name.equals(ONE_UP)) {
-					continue;
-				}
-				if (link.equals(ONE_UP)) {
-					continue;
-				}
-
-				if (name.endsWith(SLASH)) {
-					crawl.add(uri);
-					continue;
-				}
-
-				resources.add(uri);
-
-			} finally {
-				lexer.unmark();
-			}
-		}
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.util.LinkedHashSet;
+import java.util.Set;
 
-		content.close();
-		response.close();
+public class NexusClient {
 
-		for (final URI uri : crawl) {
-			resources.addAll(crawl(uri));
-		}
+    private static final Logger log = Logger.getLogger(NexusClient.class);
+    private static final String SLASH = "/";
+    private static final String ONE_UP = "../";
+    private static final String USER_AGENT_CONTENTS = "Mozilla/5.0 (X11; U; Linux x86_64;
en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13";
+
+    private final CloseableHttpClient client;
+    private final FileSystem fileSystem;
+    private final IOSystem ioSystem;
+    private final int retries;
+
+    public NexusClient(final Platform platform) {
+
+        System.setProperty("http.keepAlive", "false");
+        System.setProperty("http.maxConnections", "50");
+
+        this.retries = Integer.parseInt(System.getProperty("NexusClient.retries", "5"));
+
+        this.client = HttpClientBuilder.create().disableContentCompression()
+                .build();
+        this.fileSystem = platform.getFileSystem();
+        this.ioSystem = platform.getIoSystem();
+    }
+
+    public File download(final URI uri, final File file) throws IOException {
+        if (file.exists()) {
+
+            final long length = getContentLength(uri);
+
+            if (file.length() == length) {
+                log.info("Exists " + uri);
+                return file;
+            } else {
+                log.info("Incomplete " + uri);
+            }
+        }
+
+        log.info("Download " + uri);
+
+        final CloseableHttpResponse response = get(uri);
+
+        InputStream content = null;
+        try {
+            content = response.getEntity().getContent();
+
+            this.fileSystem.mkparent(file);
+
+            this.ioSystem.copy(content, file);
+        } finally {
+            if (content != null) {
+                content.close();
+            }
+
+            response.close();
+        }
+
+        return file;
+    }
+
+    private Long getContentLength(final URI uri) throws IOException {
+        final CloseableHttpResponse head = head(uri);
+        final Header[] headers = head.getHeaders(HttpHeaders.CONTENT_LENGTH);
+
+        if (headers != null && headers.length >= 1) {
+            return Long.valueOf(headers[0].getValue());
+        }
+
+        head.close();
+
+        return (long) -1;
+    }
+
+    private CloseableHttpResponse get(final URI uri) throws IOException {
+        return get(new HttpGet(uri), this.retries);
+    }
+
+    private CloseableHttpResponse head(final URI uri) throws IOException {
+        return get(new HttpHead(uri), this.retries);
+    }
+
+    private CloseableHttpResponse get(final HttpUriRequest request, int tries) throws IOException
{
+        try {
+            request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
+            return this.client.execute(request);
+        } catch (final IOException e) {
+            if (tries > 0) {
+                try {
+                    Thread.sleep(250);
+                } catch (final InterruptedException ie) {
+                    Thread.interrupted();
+                    throw new IOException("Interrupted", ie);
+                }
+                return get(request, tries--);
+            } else {
+                throw e;
+            }
+        }
+    }
+
+    public Set<URI> crawl(final URI index) throws IOException {
+        log.info("Crawl " + index);
+        final Set<URI> resources = new LinkedHashSet<URI>();
+
+        final CloseableHttpResponse response = get(index);
+
+        final InputStream content = response.getEntity().getContent();
+        final StreamLexer lexer = new StreamLexer(content);
+
+        final Set<URI> crawl = new LinkedHashSet<URI>();
+
+        // <a
+        // href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
+        while (lexer.readAndMark("<a ", "/a>")) {
+
+            try {
+                final String link = lexer.peek("href=\"", "\"");
+                final String name = lexer.peek(">", "<");
+
+                final URI uri = index.resolve(link);
+
+                if (name.equals(ONE_UP)) {
+                    continue;
+                }
+                if (link.equals(ONE_UP)) {
+                    continue;
+                }
+
+                if (name.endsWith(SLASH)) {
+                    crawl.add(uri);
+                    continue;
+                }
+
+                resources.add(uri);
+
+            } finally {
+                lexer.unmark();
+            }
+        }
+
+        content.close();
+        response.close();
+
+        for (final URI uri : crawl) {
+            resources.addAll(crawl(uri));
+        }
 
-		return resources;
-	}
+        return resources;
+    }
 }
\ No newline at end of file



Mime
View raw message