reef-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (REEF-2016) Fix CreateUriForPath method for different IFileSystem implements to not prepend baseUri prefix if it already contains it
Date Fri, 15 Jun 2018 21:25:00 GMT

    [ https://issues.apache.org/jira/browse/REEF-2016?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16514374#comment-16514374
] 

ASF GitHub Bot commented on REEF-2016:
--------------------------------------

bgchun closed pull request #1476: [REEF-2016] Fix CreateUriForPath methods of different IFileSystems
to not append prefix if given path already contains it
URL: https://github.com/apache/reef/pull/1476
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureBlockBlobFileSystem.cs b/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureBlockBlobFileSystem.cs
index 6623051792..c5b23b9858 100644
--- a/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureBlockBlobFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureBlockBlobFileSystem.cs
@@ -35,12 +35,22 @@ namespace Org.Apache.REEF.IO.Tests
     /// </summary>
     public sealed class TestAzureBlockBlobFileSystem
     {
-        private static readonly Uri FakeUri = new Uri("http://fake.com/container/file");
+        private static readonly Uri FakeUri = new Uri("http://storageacct.blob.core.windows.net/container/file");
+        private TestContext testContext;
+
+        private static Uri BaseUri
+        {
+            get { return new Uri(FakeUri.GetLeftPart(UriPartial.Authority)); }
+        }
+
+        public TestAzureBlockBlobFileSystem()
+        {
+            this.testContext = new TestContext();
+        }
 
         [Fact]
         public void TestCreate()
         {
-            var testContext = new TestContext();
             Stream stream = testContext.GetAzureFileSystem().Create(FakeUri);
             testContext.TestCloudBlockBlob.Received(1).Create();
             Assert.Equal(testContext.TestCreateStream, stream);
@@ -49,7 +59,6 @@ public void TestCreate()
         [Fact]
         public void TestOpen()
         {
-            var testContext = new TestContext();
             Stream stream = testContext.GetAzureFileSystem().Open(FakeUri);
             testContext.TestCloudBlockBlob.Received(1).Open();
             Assert.Equal(testContext.TestOpenStream, stream);
@@ -58,7 +67,6 @@ public void TestOpen()
         [Fact]
         public void TestDelete()
         {
-            var testContext = new TestContext();
             testContext.GetAzureFileSystem().Delete(FakeUri);
             testContext.TestCloudBlockBlob.Received(1).Delete();
         }
@@ -66,7 +74,6 @@ public void TestDelete()
         [Fact]
         public void TestExists()
         {
-            var testContext = new TestContext();
             testContext.GetAzureFileSystem().Exists(FakeUri);
             testContext.TestCloudBlockBlob.Received(1).Exists();
         }
@@ -74,7 +81,6 @@ public void TestExists()
         [Fact]
         public void TestCopyToLocal()
         {
-            var testContext = new TestContext();
             testContext.GetAzureFileSystem().CopyToLocal(FakeUri, "local");
             testContext.TestCloudBlockBlob.Received(1).DownloadToFile("local", FileMode.CreateNew);
         }
@@ -82,7 +88,6 @@ public void TestCopyToLocal()
         [Fact]
         public void TestCopyFromLocal()
         {
-            var testContext = new TestContext();
             testContext.GetAzureFileSystem().CopyFromLocal("local", FakeUri);
             testContext.TestCloudBlockBlob.Received(1).UploadFromFile("local", FileMode.Open);
         }
@@ -113,7 +118,6 @@ public void TestDeleteDirectoryAtContainer()
         [Fact]
         public void TestDeleteDirectoryRecursive()
         {
-            var testContext = new TestContext();
             testContext.TestCloudBlobDirectory.ListBlobs(true).ReturnsForAnyArgs(Enumerable.Repeat(testContext.TestCloudBlob,
5));
             testContext.GetAzureFileSystem().DeleteDirectory(new Uri("http://test.com/container/directory/directory"));
             testContext.TestCloudBlobClient.Received(1).GetContainerReference("container");
@@ -124,10 +128,43 @@ public void TestDeleteDirectoryRecursive()
         }
 
         [Fact]
-        public void TestCreateUriForPath()
+        public void TestCreateUriForAbsolutePathInvalid()
         {
-            var testContext = new TestContext();
-            Assert.Equal(FakeUri, testContext.GetAzureFileSystem().CreateUriForPath(FakeUri.LocalPath));
+            Assert.Throws<ArgumentException>(() => testContext.GetAzureFileSystem().CreateUriForPath("http://www.invalidstorageaccount.com/container/folder1/file1.txt"));
+        }
+
+        [Fact]
+        public void TestCreateUriForAbsolutePath()
+        {
+            Uri uri = new Uri("http://storageacct.blob.core.windows.net/container/folder1/folder2/file.txt");
+            Uri resultUri = testContext.GetAzureFileSystem().CreateUriForPath(uri.AbsoluteUri);
+            Assert.Equal<Uri>(uri, resultUri);
+        }
+
+        [Fact]
+        public void TestCreateUriForRelativePathValid()
+        {
+            string relativePath = "container/folder1/folder2/file.txt";
+            Uri expectedUri = new Uri(BaseUri, relativePath);
+            Uri resultUri = testContext.GetAzureFileSystem().CreateUriForPath(relativePath);
+            Assert.Equal<Uri>(expectedUri, resultUri);
+        }
+
+        [Fact]
+        public void TestCreateUriForRelativePathWithContainerNameTooSmall()
+        {
+            // Container name must be atleast 3 characters.
+            string relativePath = "c1/folder1/folder2/file.txt";
+            Uri expectedUri = new Uri(BaseUri, relativePath);
+            Assert.Throws<ArgumentException>(() => testContext.GetAzureFileSystem().CreateUriForPath(relativePath));
+        }
+
+        [Fact]
+        public void TestCreateUriForRelativePathWithInvalidContainerName()
+        {
+            // Container name cannot contain a colon character.
+            string relativePath = "c:/folder1/folder2/file.txt";
+            Assert.Throws<ArgumentException>(() => testContext.GetAzureFileSystem().CreateUriForPath(relativePath));
         }
 
         private sealed class TestContext
@@ -149,7 +186,7 @@ public IFileSystem GetAzureFileSystem()
                 var injector = TangFactory.GetTang().NewInjector(conf);
                 injector.BindVolatileInstance(TestCloudBlobClient);
                 var fs = injector.GetInstance<AzureBlockBlobFileSystem>();
-                TestCloudBlobClient.BaseUri.ReturnsForAnyArgs(new Uri(FakeUri.GetLeftPart(UriPartial.Authority)));
+                TestCloudBlobClient.BaseUri.ReturnsForAnyArgs(BaseUri);
                 TestCloudBlockBlob.Open().Returns(TestOpenStream);
                 TestCloudBlockBlob.Create().Returns(TestCreateStream);
                 TestCloudBlockBlob.Blob.ReturnsForAnyArgs(new CloudBlockBlob(FakeUri));
diff --git a/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureDataLakeFileSystem.cs b/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureDataLakeFileSystem.cs
index 821e6aebd8..7199f9efe6 100644
--- a/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureDataLakeFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO.Tests/TestAzureDataLakeFileSystem.cs
@@ -28,9 +28,21 @@ namespace Org.Apache.REEF.IO.Tests
 {
     public sealed class TestAzureDataLakeFileSystem
     {
-        private static readonly Uri FakeBaseUri = new Uri("http://fakeadls.com");
-        private static readonly Uri FakeDirUri = new Uri(FakeBaseUri, "dir");
-        private static readonly Uri FakeFileUri = new Uri($"{FakeDirUri}/fakefile");
+        private Uri FakeBaseUri
+        {
+            get { return new Uri("adl://" + new TestContext().AdlAccountFQDN); }
+        }
+
+        private Uri FakeDirUri
+        {
+            get { return new Uri(FakeBaseUri, "dir"); }
+        }
+
+        private Uri FakeFileUri
+        {
+            get { return new Uri($"{FakeDirUri}/fakefile"); }
+        }
+
         private readonly TestContext _context = new TestContext();
         private readonly AzureDataLakeFileSystem _fs;
 
@@ -195,11 +207,25 @@ public void TestGetChildrenException()
         }
 
         [Fact]
-        public void TestCreateUriForPath()
+        public void TestCreateUriForPathNoPrefix()
         {
             string dirStructure = FakeFileUri.AbsolutePath;
             Uri createdUri = _fs.CreateUriForPath(dirStructure);
-            Assert.Equal(createdUri, new Uri($"adl://{_context.AdlAccountName}{dirStructure}"));
+            Assert.Equal(new Uri(FakeBaseUri, dirStructure), createdUri);
+        }
+
+        [Fact]
+        public void TestCreateUriForPathWithPrefix()
+        {
+            Uri createdUri = _fs.CreateUriForPath(FakeFileUri.AbsolutePath);
+            Assert.Equal(FakeFileUri, createdUri);
+        }
+
+        [Fact]
+        public void TestCreateUriForPathWithInvalidPrefix()
+        {
+            // Scheme for adl accounts must start with adl and not http
+            Assert.Throws<ArgumentException>(() => _fs.CreateUriForPath("http://invalidadlaccount.net/driver3.txt"));
         }
 
         [Fact]
@@ -210,13 +236,18 @@ public void TestGetFileStatusThrowsException()
 
         private sealed class TestContext
         {
-            public readonly string AdlAccountName = "adlAccount";
+            public readonly string AdlAccountName = "adlaccount";
             public readonly AdlsClient MockAdlsClient = Microsoft.Azure.DataLake.Store.MockAdlsFileSystem.MockAdlsClient.GetMockClient();
 
+            public string AdlAccountFQDN
+            {
+                get { return $"{AdlAccountName}.azuredatalakestore.net"; }
+            }
+
             public AzureDataLakeFileSystem GetAdlsFileSystem()
             {
                 var conf = AzureDataLakeFileSystemConfiguration.ConfigurationModule
-                     .Set(AzureDataLakeFileSystemConfiguration.DataLakeStorageAccountName,
"adlsAccountName")
+                     .Set(AzureDataLakeFileSystemConfiguration.DataLakeStorageAccountName,
AdlAccountFQDN)
                     .Set(AzureDataLakeFileSystemConfiguration.Tenant, "tenant")
                     .Set(AzureDataLakeFileSystemConfiguration.ClientId, "clientId")
                     .Set(AzureDataLakeFileSystemConfiguration.SecretKey, "secretKey")
@@ -225,7 +256,7 @@ public AzureDataLakeFileSystem GetAdlsFileSystem()
                 var testDataLakeStoreClient = Substitute.For<IDataLakeStoreClient>();
                 injector.BindVolatileInstance(testDataLakeStoreClient);
                 testDataLakeStoreClient.GetAdlsClient().ReturnsForAnyArgs(MockAdlsClient);
-                testDataLakeStoreClient.AccountFQDN.Returns(AdlAccountName);
+                testDataLakeStoreClient.AccountFQDN.Returns(AdlAccountFQDN);
                 var fs = injector.GetInstance<AzureDataLakeFileSystem>();
                 return fs;
             }
diff --git a/lang/cs/Org.Apache.REEF.IO.Tests/TestHadoopFileSystem.cs b/lang/cs/Org.Apache.REEF.IO.Tests/TestHadoopFileSystem.cs
index 889fe10e93..94297a55e8 100644
--- a/lang/cs/Org.Apache.REEF.IO.Tests/TestHadoopFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO.Tests/TestHadoopFileSystem.cs
@@ -18,6 +18,7 @@
 using System;
 using System.IO;
 using System.Linq;
+using System.Text.RegularExpressions;
 using Org.Apache.REEF.IO.FileSystem;
 using Org.Apache.REEF.IO.FileSystem.Hadoop;
 using Org.Apache.REEF.Tang.Annotations;
@@ -142,6 +143,26 @@ public void TestCreate()
             Assert.Throws<NotImplementedException>(() => _fileSystem.Create(GetTempUri()));
         }
 
+        [Fact(Skip = "These tests need to be run in an environment with HDFS installed.")]
+        public void CreateUriForPathNoPrefix()
+        {
+            Uri uri = _fileSystem.CreateUriForPath("/tmp/TestHadoop");
+            Assert.True(new Regex("hdfs://[a-z]+:\\d+/tmp/TestHadoop").Match(uri.AbsoluteUri).Success);
+        }
+
+        [Fact(Skip = "These tests need to be run in an environment with HDFS installed.")]
+        public void TestCreateUriForPathInvalid()
+        {
+            Assert.Throws<ArgumentException>(() => _fileSystem.CreateUriForPath("http://www.invalidhadoopfs.com/container/folder1/file1.txt"));
+        }
+
+        [Fact(Skip = "These tests need to be run in an environment with HDFS installed.")]
+        public void CreateUriForPathWithPrefix()
+        {
+            string uriString = "hdfs://localhost:9000/tmp/TestHadoop";
+            Assert.Equal(new Uri(uriString), _fileSystem.CreateUriForPath(uriString));
+        }
+
         /// <summary>
         /// This test is to make sure with the HadoopFileSystemConfiguration, HadoopFileSystem
can be injected.
         /// </summary>
diff --git a/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureBlob/AzureBlockBlobFileSystem.cs b/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureBlob/AzureBlockBlobFileSystem.cs
index d333839539..9292f42296 100644
--- a/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureBlob/AzureBlockBlobFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureBlob/AzureBlockBlobFileSystem.cs
@@ -35,6 +35,8 @@ internal sealed class AzureBlockBlobFileSystem : IFileSystem
     {
         private readonly ICloudBlobClient _client;
 
+        private const char UrlPathSeparator = '/';
+
         [Inject]
         private AzureBlockBlobFileSystem(ICloudBlobClient client)
         {
@@ -186,12 +188,48 @@ public IEnumerable<Uri> GetChildren(Uri directoryUri)
         /// <summary>
         /// Creates a Uri using the relative path to the remote file (including the container),
         /// getting the absolute URI from the Blob client's base URI.
+        /// If path is null or the prefix doesn't match the base uri in the FileSystem, throw
ArgumentException.
         /// </summary>
-        /// <param name="path">The relative path to the remote file, including the
container</param>
+        /// <param name="path">The relative or absolute path to the remote file, including
the container</param>
         /// <returns>The URI to the remote file</returns>
         public Uri CreateUriForPath(string path)
         {
-            return new Uri(_client.BaseUri.AbsoluteUri.TrimEnd('/') + '/' + path.Trim('/'));
+            if (path == null)
+            {
+                throw new ArgumentNullException(nameof(path), "Specified path is null");
+            }
+
+            Uri resultUri = null;
+            try
+            {
+                resultUri = new Uri(path);
+            }
+            catch (UriFormatException)
+            {
+                resultUri = new Uri(_client.BaseUri, path);
+            }
+
+            if (!resultUri.AbsoluteUri.StartsWith(_client.BaseUri.AbsoluteUri))
+            {
+                throw new ArgumentException($"Given URI must begin with valid prefix ({_client.BaseUri.AbsoluteUri})",
nameof(path));
+            }
+
+            if (resultUri.Segments.Count() < 2)
+            {
+                throw new ArgumentException("Input path must have a container name", nameof(path));
+            }
+
+            string containerName = resultUri.Segments[1].Trim(UrlPathSeparator);
+            NameValidator.ValidateContainerName(containerName);
+            NameValidator.ValidateBlobName(resultUri.PathAndQuery);
+
+            // If the last segment does not end with a '/', we require it to be a valid file
name.
+            if (!resultUri.PathAndQuery.EndsWith(UrlPathSeparator.ToString()))
+            {
+                NameValidator.ValidateFileName(resultUri.Segments[resultUri.Segments.Length
- 1]);
+            }
+
+            return resultUri;
         }
 
         /// <summary>
diff --git a/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureDataLake/AzureDataLakeFileSystem.cs
b/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureDataLake/AzureDataLakeFileSystem.cs
index 39d1f04c35..139c5b28b2 100644
--- a/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureDataLake/AzureDataLakeFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO/FileSystem/AzureDataLake/AzureDataLakeFileSystem.cs
@@ -33,6 +33,11 @@ internal sealed class AzureDataLakeFileSystem : IFileSystem
         private readonly IDataLakeStoreClient _client;
         private readonly AdlsClient _adlsClient;
 
+        private string UriPrefix
+        {
+            get { return $"adl://{_client.AccountFQDN}"; }
+        }
+
         [Inject]
         private AzureDataLakeFileSystem(IDataLakeStoreClient client)
         {
@@ -192,7 +197,7 @@ public IEnumerable<Uri> GetChildren(Uri directoryUri)
         /// <summary>
         /// Create Uri from a given file path.
         /// The file path can be full with prefix or relative without prefix.
-        /// If null is passed as the path, ArgumentException will be thrown.
+        /// If path is null or the prefix doesn't match the prefix in the FileSystem, throw
ArgumentException.
         /// </summary>
         /// <exception cref="ArgumentNullException">If specified path is null</exception>
         public Uri CreateUriForPath(string path)
@@ -201,7 +206,24 @@ public Uri CreateUriForPath(string path)
             {
                 throw new ArgumentNullException(nameof(path), "Specified path is null");
             }
-            return new Uri($"{GetUriPrefix()}/{path.TrimStart('/')}");
+
+            Uri resultUri = null;
+
+            try
+            {
+                resultUri = new Uri(path);
+            }
+            catch (UriFormatException)
+            {
+                resultUri = new Uri(new Uri(this.UriPrefix), path);
+            }
+
+            if (!resultUri.AbsoluteUri.StartsWith(this.UriPrefix))
+            {
+                throw new ArgumentException($"Given URI must begin with valid prefix ({this.UriPrefix})",
nameof(path));
+            }
+
+            return resultUri;
         }
 
         /// <summary>
@@ -223,10 +245,5 @@ public FileStatus GetFileStatus(Uri remoteFileUri)
 
             return new FileStatus(entrySummary.LastModifiedTime.Value, entrySummary.Length);
         }
-
-        private string GetUriPrefix()
-        {
-            return $"adl://{_client.AccountFQDN}";
-        }
     }
 }
diff --git a/lang/cs/Org.Apache.REEF.IO/FileSystem/Hadoop/HadoopFileSystem.cs b/lang/cs/Org.Apache.REEF.IO/FileSystem/Hadoop/HadoopFileSystem.cs
index f7f8274248..cd80b629ab 100644
--- a/lang/cs/Org.Apache.REEF.IO/FileSystem/Hadoop/HadoopFileSystem.cs
+++ b/lang/cs/Org.Apache.REEF.IO/FileSystem/Hadoop/HadoopFileSystem.cs
@@ -55,22 +55,32 @@ private HadoopFileSystem(HdfsCommandRunner commandRunner)
         /// <summary>
         /// Create Uri from a given file name
         /// If the path already contains prefix, use it directly and verify the prefix after
it is created.
-        /// Otherwise add the prefix in fron of the relative path.
+        /// Otherwise add the prefix in front of the relative path.
         /// If path is null or the prefix doesn't match the prefix in the FileSystem, throw
ArgumentException
         /// </summary>
         /// <param name="path"></param>
         /// <returns></returns>
         public Uri CreateUriForPath(string path)
         {
-            Logger.Log(Level.Info, string.Format(CultureInfo.CurrentCulture, "CreateUriForPath
with path: {0}, _uriPrefix: {1}.", path, _uriPrefix));
             if (path == null)
             {
-                throw new ArgumentException("null path passed in CreateUriForPath");
+                throw new ArgumentNullException(nameof(path));
             }
 
-            Uri uri;
-            uri = new Uri(path);
-            Logger.Log(Level.Info, string.Format(CultureInfo.CurrentCulture, "Uri {0} created
in CreateUriForPath.", uri));
+            Uri uri = null;
+            try
+            {
+                uri = new Uri(path);
+            }
+            catch (UriFormatException)
+            {
+                uri = new Uri(new Uri(_uriPrefix), path);
+            }
+
+            if (!uri.AbsoluteUri.StartsWith(_uriPrefix))
+            {
+                throw new ArgumentException($"Given URI does not begin with valid prefix
({_uriPrefix})");
+            }
 
             return uri;
         }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Fix CreateUriForPath method for different IFileSystem implements to not prepend baseUri
prefix if it already contains it
> ------------------------------------------------------------------------------------------------------------------------
>
>                 Key: REEF-2016
>                 URL: https://issues.apache.org/jira/browse/REEF-2016
>             Project: REEF
>          Issue Type: Bug
>          Components: REEF-IO
>            Reporter: Dwaipayan Mukhopadhyay
>            Assignee: Dwaipayan Mukhopadhyay
>            Priority: Major
>             Fix For: 0.17
>
>
> The _CreateUriForPath_(string path) method is summarised as follows
> /// Create Uri from a given file path.
> /// The file path can be full with prefix or relative without prefix.
> /// If null is passed as the path, ArgumentException will be thrown.
> Currently the AzureBlob, Azure Data Lake and HDFS implementations of IFileSystem treat
the path as relative and appends the baseUri prefix. If a full uri is passed with prefix
the returned Uri is incorrect. This needs to be fixed



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message