From commits-return-91198-archive-asf-public=cust-asf.ponee.io@hbase.apache.org Tue Jan 14 00:03:49 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id A5E0918064E for ; Tue, 14 Jan 2020 01:03:48 +0100 (CET) Received: (qmail 50958 invoked by uid 500); 14 Jan 2020 00:03:47 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 50882 invoked by uid 99); 14 Jan 2020 00:03:47 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 14 Jan 2020 00:03:47 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 5EA2581F12; Tue, 14 Jan 2020 00:03:47 +0000 (UTC) Date: Tue, 14 Jan 2020 00:03:49 +0000 To: "commits@hbase.apache.org" Subject: [hbase] branch master updated: HBASE-23679 FileSystem objects leak when cleaned up in cleanupBulkLoad MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <157896022653.8002.7919814875247662689@gitbox.apache.org> From: elserj@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: hbase X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 0bf933b0686c78601f95c580ff7436918c4efcfe X-Git-Newrev: 1d2c3efc690f6c408a99a20942b8fa81c77fb703 X-Git-Rev: 1d2c3efc690f6c408a99a20942b8fa81c77fb703 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. elserj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase.git The following commit(s) were added to refs/heads/master by this push: new 1d2c3ef HBASE-23679 FileSystem objects leak when cleaned up in cleanupBulkLoad 1d2c3ef is described below commit 1d2c3efc690f6c408a99a20942b8fa81c77fb703 Author: Josh Elser AuthorDate: Mon Jan 13 18:30:30 2020 -0500 HBASE-23679 FileSystem objects leak when cleaned up in cleanupBulkLoad The cleanupBulkLoad method is only called for the first Region in the table which was being bulk loaded into. This means that potentially N-1 other RegionServers (where N is the number of RegionServers) will leak one FileSystem object into the FileSystem cache which will never be cleaned up. We need to do this clean-up as a part of secureBulkLoadHFiles otherwise we cannot guarantee that heap usage won't grow unbounded. Closes #1029 Signed-off-by: Sean Busbey --- .../hbase/regionserver/SecureBulkLoadManager.java | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java index bccc8fe..599c898 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java @@ -153,26 +153,15 @@ public class SecureBulkLoadManager { public void cleanupBulkLoad(final HRegion region, final CleanupBulkLoadRequest request) throws IOException { - try { - region.getCoprocessorHost().preCleanupBulkLoad(getActiveUser()); + region.getCoprocessorHost().preCleanupBulkLoad(getActiveUser()); - Path path = new Path(request.getBulkToken()); - if (!fs.delete(path, true)) { - if (fs.exists(path)) { - throw new IOException("Failed to clean up " + path); - } - } - LOG.info("Cleaned up " + path + " successfully."); - } finally { - UserGroupInformation ugi = getActiveUser().getUGI(); - try { - if (!UserGroupInformation.getLoginUser().equals(ugi) && !isUserReferenced(ugi)) { - FileSystem.closeAllForUGI(ugi); - } - } catch (IOException e) { - LOG.error("Failed to close FileSystem for: " + ugi, e); + Path path = new Path(request.getBulkToken()); + if (!fs.delete(path, true)) { + if (fs.exists(path)) { + throw new IOException("Failed to clean up " + path); } } + LOG.trace("Cleaned up {} successfully.", path); } private Consumer fsCreatedListener; @@ -281,6 +270,13 @@ public class SecureBulkLoadManager { public Map> run() { FileSystem fs = null; try { + /* + * This is creating and caching a new FileSystem instance. Other code called + * "beneath" this method will rely on this FileSystem instance being in the + * cache. This is important as those methods make _no_ attempt to close this + * FileSystem instance. It is critical that here, in SecureBulkLoadManager, + * we are tracking the lifecycle and closing the FS when safe to do so. + */ fs = FileSystem.get(conf); for(Pair el: familyPaths) { Path stageFamily = new Path(bulkToken, Bytes.toString(el.getFirst())); @@ -305,6 +301,13 @@ public class SecureBulkLoadManager { }); } finally { decrementUgiReference(ugi); + try { + if (!UserGroupInformation.getLoginUser().equals(ugi) && !isUserReferenced(ugi)) { + FileSystem.closeAllForUGI(ugi); + } + } catch (IOException e) { + LOG.error("Failed to close FileSystem for: {}", ugi, e); + } if (region.getCoprocessorHost() != null) { region.getCoprocessorHost().postBulkLoadHFile(familyPaths, map); }