openwhisk-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeremiaswer...@apache.org
Subject [incubator-openwhisk] branch master updated: Remove broken containers if docker run fails (#2870)
Date Wed, 25 Oct 2017 16:01:24 GMT
This is an automated email from the ASF dual-hosted git repository.

jeremiaswerner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new e666186  Remove broken containers if docker run fails (#2870)
e666186 is described below

commit e6661864f2857b1a86f3aea970d27bc7f51c7d47
Author: Sven Lange-Last <sven.lange-last@de.ibm.com>
AuthorDate: Wed Oct 25 18:01:21 2017 +0200

    Remove broken containers if docker run fails (#2870)
    
    * Remove broken containers if docker run fails
    
    If `docker run` fails with exit code 125 and prints a valid container ID to stdout, a
broken Docker container was left behind. This PR detects the described situation and immediately
removes the broken container to save disk space. In addition, the Docker daemon seems to slow
down with the number of containers, so cleaning up broken containers can also have a positive
performance impact.
    
    The main root cause for broken containers is a failure when extracting the container's
file system - for example, due to missing disk space or exhausted inodes. This PR does fully
resolve low disk space or exhausted inodes but it can help relax such situations. For example,
if a container with a very large file system cannot be created, cleaning it up may enable
the creation of a smaller container.
    
    * Address review feedback
    
    * Address review feedback
---
 .../core/containerpool/docker/DockerClient.scala   | 37 ++++++++++++-
 .../containerpool/docker/DockerContainer.scala     |  9 +++-
 .../docker/test/DockerClientTests.scala            | 62 +++++++++++++++++++++-
 .../docker/test/DockerContainerTests.scala         | 25 ++++++++-
 4 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerClient.scala
b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerClient.scala
index d714a9f..bf94325 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerClient.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerClient.scala
@@ -35,6 +35,18 @@ import scala.collection.concurrent.TrieMap
 import whisk.core.containerpool.ContainerId
 import whisk.core.containerpool.ContainerAddress
 
+object DockerContainerId {
+
+  val containerIdRegex = """^([0-9a-f]{64})$""".r
+
+  def parse(id: String): Try[ContainerId] = {
+    id match {
+      case containerIdRegex(_) => Success(ContainerId(id))
+      case _                   => Failure(new IllegalArgumentException(s"Does not comply
with Docker container ID format: ${id}"))
+    }
+  }
+}
+
 /**
  * Serves as interface to the docker CLI tool.
  *
@@ -63,8 +75,26 @@ class DockerClient(dockerHost: Option[String] = None)(executionContext:
Executio
     Seq(dockerBin) ++ host
   }
 
-  def run(image: String, args: Seq[String] = Seq.empty[String])(implicit transid: TransactionId):
Future[ContainerId] =
-    runCmd((Seq("run", "-d") ++ args ++ Seq(image)): _*).map(ContainerId.apply)
+  def run(image: String, args: Seq[String] = Seq.empty[String])(
+    implicit transid: TransactionId): Future[ContainerId] = {
+    runCmd((Seq("run", "-d") ++ args ++ Seq(image)): _*)
+      .map {
+        ContainerId(_)
+      }
+      .recoverWith {
+        // https://docs.docker.com/v1.12/engine/reference/run/#/exit-status
+        // Exit code 125 means an error reported by the Docker daemon.
+        // Examples:
+        // - Unrecognized option specified
+        // - Not enough disk space
+        case pre: ProcessRunningException if pre.exitCode == 125 =>
+          Future.failed(
+            DockerContainerId
+              .parse(pre.stdout)
+              .map(BrokenDockerContainer(_, s"Broken container: ${pre.getMessage}"))
+              .getOrElse(pre))
+      }
+  }
 
   def inspectIPAddress(id: ContainerId, network: String)(implicit transid: TransactionId):
Future[ContainerAddress] =
     runCmd("inspect", "--format", s"{{.NetworkSettings.Networks.${network}.IPAddress}}",
id.asString).flatMap {
@@ -190,3 +220,6 @@ trait DockerApi {
    */
   def isOomKilled(id: ContainerId)(implicit transid: TransactionId): Future[Boolean]
 }
+
+/** Indicates any error while starting a container that leaves a broken container behind
that needs to be removed */
+case class BrokenDockerContainer(id: ContainerId, msg: String) extends Exception(msg)
diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
index a7b1785..4827eb5 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/docker/DockerContainer.scala
@@ -98,7 +98,14 @@ object DockerContainer {
     for {
       _ <- pulled
       id <- docker.run(image, args).recoverWith {
-        case _ => Future.failed(WhiskContainerStartupError(s"Failed to run container with
image '${image}'."))
+        case BrokenDockerContainer(brokenId, message) =>
+          // Remove the broken container - but don't wait or check for the result.
+          // If the removal fails, there is nothing we could do to recover from the recovery.
+          docker.rm(brokenId)
+          Future.failed(
+            WhiskContainerStartupError(s"Failed to run container with image '${image}'. Removing
broken container."))
+        case _ =>
+          Future.failed(WhiskContainerStartupError(s"Failed to run container with image '${image}'."))
       }
       ip <- docker.inspectIPAddress(id, network).recoverWith {
         // remove the container immediately if inspect failed as
diff --git a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerClientTests.scala
b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerClientTests.scala
index 1235b92..8f447cf 100644
--- a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerClientTests.scala
+++ b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerClientTests.scala
@@ -23,6 +23,8 @@ import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
 import scala.concurrent.duration.DurationInt
 import scala.concurrent.duration.FiniteDuration
+import scala.concurrent.Promise
+import scala.util.Success
 import org.junit.runner.RunWith
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.FlatSpec
@@ -33,10 +35,12 @@ import whisk.common.LogMarker
 import whisk.common.LoggingMarkers.INVOKER_DOCKER_CMD
 import whisk.common.TransactionId
 import whisk.core.containerpool.docker.DockerClient
-import scala.concurrent.Promise
 import whisk.core.containerpool.ContainerId
 import whisk.core.containerpool.ContainerAddress
 import whisk.utils.retry
+import whisk.core.containerpool.docker.ProcessRunningException
+import whisk.core.containerpool.docker.DockerContainerId
+import whisk.core.containerpool.docker.BrokenDockerContainer
 
 @RunWith(classOf[JUnitRunner])
 class DockerClientTests extends FlatSpec with Matchers with StreamLogging with BeforeAndAfterEach
{
@@ -44,7 +48,7 @@ class DockerClientTests extends FlatSpec with Matchers with StreamLogging
with B
   override def beforeEach = stream.reset()
 
   implicit val transid = TransactionId.testing
-  val id = ContainerId("Id")
+  val id = ContainerId("55db56ee082239428b27d3728b4dd324c09068458aad9825727d5bfc1bba6d52")
 
   val commandTimeout = 500.milliseconds
   def await[A](f: Future[A], timeout: FiniteDuration = commandTimeout) = Await.result(f,
timeout)
@@ -57,6 +61,31 @@ class DockerClientTests extends FlatSpec with Matchers with StreamLogging
with B
     override def executeProcess(args: String*)(implicit ec: ExecutionContext) = execResult
   }
 
+  behavior of "DockerContainerId"
+
+  it should "convert a proper container ID" in {
+    DockerContainerId.parse(id.asString) shouldBe Success(id)
+  }
+
+  it should "reject improper container IDs with IllegalArgumentException" in {
+    def verifyFailure(improperId: String) = {
+      val iae = the[IllegalArgumentException] thrownBy DockerContainerId.parse(improperId).get
+      iae.getMessage should include(improperId)
+    }
+
+    Seq[(String, String)](
+      ("", "String empty (too short)"),
+      ("1" * 63, "String too short"),
+      ("1" * 65, "String too long"),
+      (("1" * 63) + "x", "Improper characters"),
+      ("abcxdef", "Improper characters and too short")).foreach {
+      case (improperId, clue) =>
+        withClue(s"${clue} - length('${improperId}') = ${improperId.length}: ") {
+          verifyFailure(improperId)
+        }
+    }
+  }
+
   behavior of "DockerClient"
 
   it should "return a list of containers and pass down the correct arguments when using 'ps'"
in {
@@ -201,4 +230,33 @@ class DockerClientTests extends FlatSpec with Matchers with StreamLogging
with B
     runAndVerify(dc.run("image"), "run")
     runAndVerify(dc.pull("image"), "pull")
   }
+
+  it should "fail with BrokenDockerContainer when run returns with exit code 125 and a container
ID" in {
+    val dc = dockerClient {
+      Future.failed(
+        ProcessRunningException(
+          exitCode = 125,
+          stdout = id.asString,
+          stderr =
+            """/usr/bin/docker: Error response from daemon: mkdir /var/run/docker.1.1/libcontainerd.1.1/55db56ee082239428b27d3728b4dd324c09068458aad9825727d5bfc1bba6d52:
no space left on device."""))
+    }
+    val bdc = the[BrokenDockerContainer] thrownBy await(dc.run("image", Seq()))
+    bdc.id shouldBe id
+  }
+
+  it should "fail with ProcessRunningException when run returns with exit code !=125 or no
container ID" in {
+    def runAndVerify(pre: ProcessRunningException, clue: String) = {
+      val dc = dockerClient { Future.failed(pre) }
+      withClue(s"${clue} - exitCode = ${pre.exitCode}, stdout = '${pre.stdout}', stderr =
'${pre.stderr}': ") {
+        the[ProcessRunningException] thrownBy await(dc.run("image", Seq())) shouldBe pre
+      }
+    }
+
+    Seq[(ProcessRunningException, String)](
+      (ProcessRunningException(126, id.asString, "Unknown command"), "Exit code not 125"),
+      (ProcessRunningException(125, "", "Unknown flag: --foo"), "No container ID"),
+      (ProcessRunningException(1, "", ""), "Exit code not 125 and no container ID")).foreach
{
+      case (pre, clue) => runAndVerify(pre, clue)
+    }
+  }
 }
diff --git a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
index af16af3..8ded76e 100644
--- a/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
+++ b/tests/src/test/scala/whisk/core/containerpool/docker/test/DockerContainerTests.scala
@@ -189,7 +189,7 @@ class DockerContainerTests
       override def run(image: String,
                        args: Seq[String] = Seq.empty[String])(implicit transid: TransactionId):
Future[ContainerId] = {
         runs += ((image, args))
-        Future.failed(new RuntimeException())
+        Future.failed(ProcessRunningException(1, "", ""))
       }
     }
     implicit val runc = stub[RuncApi]
@@ -207,6 +207,29 @@ class DockerContainerTests
     docker.rms should have size 0
   }
 
+  it should "remove the container if run fails with a broken container" in {
+    implicit val docker = new TestDockerClient {
+      override def run(image: String,
+                       args: Seq[String] = Seq.empty[String])(implicit transid: TransactionId):
Future[ContainerId] = {
+        runs += ((image, args))
+        Future.failed(BrokenDockerContainer(containerId, "Broken container"))
+      }
+    }
+    implicit val runc = stub[RuncApi]
+
+    val container = DockerContainer.create(
+      transid = transid,
+      image = "image",
+      userProvidedImage = false,
+      dockerRunParameters = parameters)
+    a[WhiskContainerStartupError] should be thrownBy await(container)
+
+    docker.pulls should have size 0
+    docker.runs should have size 1
+    docker.inspects should have size 0
+    docker.rms should have size 1
+  }
+
   it should "provide a proper error if inspect fails for blackbox containers" in {
     implicit val docker = new TestDockerClient {
       override def inspectIPAddress(id: ContainerId,

-- 
To stop receiving notification emails like this one, please contact
['"commits@openwhisk.apache.org" <commits@openwhisk.apache.org>'].

Mime
View raw message