From issues-return-84626-archive-asf-public=cust-asf.ponee.io@nifi.apache.org Mon Sep 9 17:31:26 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 26638180608 for ; Mon, 9 Sep 2019 19:31:26 +0200 (CEST) Received: (qmail 44415 invoked by uid 500); 9 Sep 2019 17:31:25 -0000 Mailing-List: contact issues-help@nifi.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@nifi.apache.org Delivered-To: mailing list issues@nifi.apache.org Received: (qmail 44401 invoked by uid 99); 9 Sep 2019 17:31:25 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 09 Sep 2019 17:31:25 +0000 From: GitBox To: issues@nifi.apache.org Subject: [GitHub] [nifi] markap14 commented on a change in pull request #3702: NIFI-6636: Fixed ListGCSBucket file duplication error Message-ID: <156805028555.14486.15561633700203199599.gitbox@gitbox.apache.org> Date: Mon, 09 Sep 2019 17:31:25 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit markap14 commented on a change in pull request #3702: NIFI-6636: Fixed ListGCSBucket file duplication error URL: https://github.com/apache/nifi/pull/3702#discussion_r322364290 ########## File path: nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/ListGCSBucket.java ########## @@ -381,40 +384,36 @@ public void onTrigger(ProcessContext context, ProcessSession session) throws Pro // Update state if (lastModified > maxTimestamp) { maxTimestamp = lastModified; - currentKeys.clear(); + maxKeys.clear(); } if (lastModified == maxTimestamp) { - currentKeys.add(blob.getName()); + maxKeys.add(blob.getName()); } - listCount++; + loadCount++; } - blobPages = blobPages.getNextPage(); - commit(context, session, listCount); - listCount = 0; - } while (blobPages != null); - - currentTimestamp = maxTimestamp; + commit(context, session, loadCount); - final long listMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); - getLogger().info("Successfully listed GCS bucket {} in {} millis", new Object[]{bucket, listMillis}); + blobPage = blobPage.getNextPage(); + } while (blobPage != null); - if (!commit(context, session, listCount)) { - if (currentTimestamp > 0) { - persistState(context); - } - getLogger().debug("No new objects in GCS bucket {} to list. Yielding.", new Object[]{bucket}); + if (maxTimestamp != 0) { + currentTimestamp = maxTimestamp; + currentKeys = maxKeys; + persistState(context); + } else { + getLogger().debug("No new objects in GCS bucket {} to load. Yielding.", new Object[]{bucket}); context.yield(); } + + final long listMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + getLogger().info("Successfully listed GCS bucket {} in {} millis", new Object[]{bucket, listMillis}); } - private boolean commit(final ProcessContext context, final ProcessSession session, int listCount) { - boolean willCommit = listCount > 0; - if (willCommit) { - getLogger().info("Successfully listed {} new files from GCS; routing to success", new Object[] {listCount}); + private void commit(final ProcessContext context, final ProcessSession session, int loadCount) { + if (loadCount > 0) { + getLogger().info("Successfully loaded {} new files from GCS; routing to success", new Object[] {loadCount}); Review comment: Thanks, looks good to me. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services