cloudstack-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wid...@apache.org
Subject [15/15] git commit: updated refs/heads/rbd-snap-clone to 75c272d
Date Fri, 24 May 2013 10:10:01 GMT
rbd: Use cloning for deploying templates instead of a copy

RBD format 2 supports cloning (aka layering) where one base image can serve
as a parent image for multiple child images.

This enables fast deployment of a large amount of virtual machines, but it also
saves spaces on the Ceph cluster and improves performance due to better caching.

Qemu-img doesn't support RBD format 2 (yet), so to enable these functions the
RADOS/RBD Java bindings are required.

This patch also enables deployment of System VMs on RBD storage pools. Since we
no longer require a patchdisk for passing the boot arguments we are able to deploy
these VMs on RBD.


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/75c272db
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/75c272db
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/75c272db

Branch: refs/heads/rbd-snap-clone
Commit: 75c272dba1d51e9cb8423825d811e05e42968828
Parents: 09c6030
Author: Wido den Hollander <wido@42on.com>
Authored: Fri May 24 12:09:07 2013 +0200
Committer: Wido den Hollander <wido@widodh.nl>
Committed: Fri May 24 12:09:07 2013 +0200

----------------------------------------------------------------------
 .../allocator/AbstractStoragePoolAllocator.java    |    6 -
 plugins/hypervisors/kvm/pom.xml                    |    9 +
 .../kvm/resource/LibvirtComputingResource.java     |    7 +
 .../kvm/storage/LibvirtStorageAdaptor.java         |  248 +++++++++++++--
 pom.xml                                            |    1 +
 5 files changed, 246 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/75c272db/engine/storage/src/org/apache/cloudstack/storage/allocator/AbstractStoragePoolAllocator.java
----------------------------------------------------------------------
diff --git a/engine/storage/src/org/apache/cloudstack/storage/allocator/AbstractStoragePoolAllocator.java
b/engine/storage/src/org/apache/cloudstack/storage/allocator/AbstractStoragePoolAllocator.java
index 3a66b85..5326701 100755
--- a/engine/storage/src/org/apache/cloudstack/storage/allocator/AbstractStoragePoolAllocator.java
+++ b/engine/storage/src/org/apache/cloudstack/storage/allocator/AbstractStoragePoolAllocator.java
@@ -167,12 +167,6 @@ public abstract class AbstractStoragePoolAllocator extends AdapterBase
implement
             return false;
         }
         
-        DiskOfferingVO diskOffering = _diskOfferingDao.findById(dskCh.getDiskOfferingId());
-        if (diskOffering.getSystemUse() && pool.getPoolType() == StoragePoolType.RBD)
{
-            s_logger.debug("Skipping RBD pool " + pool.getName() + " as a suitable pool.
RBD is not supported for System VM's");
-            return false;
-        }
-
         
 		Long clusterId = pool.getClusterId();
 		ClusterVO cluster = _clusterDao.findById(clusterId);

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/75c272db/plugins/hypervisors/kvm/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/hypervisors/kvm/pom.xml b/plugins/hypervisors/kvm/pom.xml
index 613c817..1babe7c 100644
--- a/plugins/hypervisors/kvm/pom.xml
+++ b/plugins/hypervisors/kvm/pom.xml
@@ -24,6 +24,10 @@
       <id>libvirt-org</id>
       <url>http://libvirt.org/maven2</url>
     </repository>
+    <repository>
+      <id>ceph-com</id>
+      <url>http://ceph.com/maven</url>
+    </repository>
   </repositories>
   <dependencies>
     <dependency>
@@ -36,6 +40,11 @@
       <artifactId>libvirt</artifactId>
       <version>${cs.libvirt-java.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.ceph</groupId>
+      <artifactId>rados</artifactId>
+      <version>${cs.rados-java.version}</version>
+    </dependency>
   </dependencies>
   <build>
     <defaultGoal>install</defaultGoal>

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/75c272db/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
----------------------------------------------------------------------
diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
index 1e20d75..c34d1eb 100755
--- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
+++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
@@ -1253,6 +1253,13 @@ ServerResource {
     }
 
     private CopyVolumeAnswer execute(CopyVolumeCommand cmd) {
+       /**
+            This method is only used for copying files from Primary Storage TO Secondary
Storage
+
+            It COULD also do it the other way around, but the code in the ManagementServerImpl
shows
+            that it always sets copyToSecondary to true
+
+         */
         boolean copyToSecondary = cmd.toSecondaryStorage();
         String volumePath = cmd.getVolumePath();
         StorageFilerTO pool = cmd.getPool();

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/75c272db/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java
----------------------------------------------------------------------
diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java
index e7e4bbf..fa2f670 100644
--- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java
+++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java
@@ -17,6 +17,9 @@
 package com.cloud.hypervisor.kvm.storage;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -37,6 +40,12 @@ import org.libvirt.StoragePool;
 import org.libvirt.StoragePoolInfo;
 import org.libvirt.StorageVol;
 import org.libvirt.StoragePoolInfo.StoragePoolState;
+import com.ceph.rados.Rados;
+import com.ceph.rados.RadosException;
+import com.ceph.rados.IoCTX;
+import com.ceph.rbd.Rbd;
+import com.ceph.rbd.RbdImage;
+import com.ceph.rbd.RbdException;
 
 import com.cloud.agent.api.ManageSnapshotCommand;
 import com.cloud.hypervisor.kvm.resource.LibvirtConnection;
@@ -63,6 +72,8 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
     private String _mountPoint = "/mnt";
     private String _manageSnapshotPath;
 
+    private String rbdTemplateSnapName = "cloudstack-base-snap";
+
     public LibvirtStorageAdaptor(StorageLayer storage) {
         _storageLayer = storage;
         _manageSnapshotPath = Script.findScript("scripts/storage/qcow2/",
@@ -638,6 +649,15 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
         }
     }
 
+    /**
+     * This function copies a physical disk from Secondary Storage to Primary Storage
+     * or from Primary to Primary Storage
+     *
+     * The first time a template is deployed in Primary Storage it will be copied from
+     * Secondary to Primary.
+     *
+     * If it has been created on Primary Storage, it will be copied on the Primary Storage
+     */
     @Override
     public KVMPhysicalDisk createDiskFromTemplate(KVMPhysicalDisk template,
             String name, PhysicalDiskFormat format, long size, KVMStoragePool destPool) {
@@ -690,21 +710,118 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
 
                 if (srcPool.getType() != StoragePoolType.RBD) {
                     srcFile = new QemuImgFile(template.getPath(), template.getFormat());
+                    qemu.convert(srcFile, destFile);
                 } else {
-                    template.setFormat(PhysicalDiskFormat.RAW);
-                    srcFile = new QemuImgFile(KVMPhysicalDisk.RBDStringBuilder(srcPool.getSourceHost(),
-                            srcPool.getSourcePort(),
-                            srcPool.getAuthUserName(),
-                            srcPool.getAuthSecret(),
-                            template.getPath()));
-                    srcFile.setFormat(template.getFormat());
+
+                    /**
+                     * We have to find out if the source file is in the same RBD pool and
has
+                     * RBD format 2 before we can do a layering/clone operation on the RBD
image
+                     *
+                     * This will be the case when the template is already on Primary Storage
and
+                     * we want to copy it
+                     */
+
+                    /* Feature 1<<0 means layering in RBD format 2 */
+                    int rbdFeatures = (1<<0);
+                    /* Order 0 means 4MB blocks (the default) */
+                    int rbdOrder = 0;
+
+                    try {
+                        if ((srcPool.getSourceHost().equals(destPool.getSourceHost())) &&
(srcPool.getSourceDir().equals(destPool.getSourceDir()))) {
+                            /* We are on the same Ceph cluster, but we require RBD format
2 on the source image */
+                            s_logger.debug("Trying to perform a RBD clone (layering) since
we are operating in the same storage pool");
+   
+                            Rados r = new Rados(srcPool.getAuthUserName());
+                            r.confSet("mon_host", srcPool.getSourceHost() + ":" + srcPool.getSourcePort());
+                            r.confSet("key", srcPool.getAuthSecret());
+                            r.connect();
+                            s_logger.debug("Succesfully connected to Ceph cluster at " +
r.confGet("mon_host"));
+
+                            IoCTX io = r.ioCtxCreate(srcPool.getSourceDir());
+                            Rbd rbd = new Rbd(io);
+                            RbdImage srcImage = rbd.open(template.getName());
+
+                            if (srcImage.isOldFormat()) {
+                                /* The source image is RBD format 1, we have to do a regular
copy */
+                                s_logger.debug("The source image " + srcPool.getSourceDir()
+ "/" + template.getName()
+                                               + " is RBD format 1. We have to perform a
regular copy (" + template.getVirtualSize() + " bytes)");
+
+                                rbd.create(disk.getName(), template.getVirtualSize(), rbdFeatures,
rbdOrder);
+                                RbdImage destImage = rbd.open(disk.getName());
+
+                                s_logger.debug("Starting to copy " + srcImage.getName() +
 " to " + destImage.getName() + " in Ceph pool " + srcPool.getSourceDir());
+                                rbd.copy(srcImage, destImage);
+
+                                s_logger.debug("Finished copying " + srcImage.getName() +
 " to " + destImage.getName() + " in Ceph pool " + srcPool.getSourceDir());
+                                rbd.close(destImage);
+                            } else {
+                                s_logger.debug("The source image " + srcPool.getSourceDir()
+ "/" + template.getName()
+                                               + " is RBD format 2. We will perform a RBD
clone using snapshot "
+                                               + this.rbdTemplateSnapName);
+                                /* The source image is format 2, we can do a RBD snapshot+clone
(layering) */
+                                rbd.clone(template.getName(), this.rbdTemplateSnapName, io,
disk.getName(), rbdFeatures, rbdOrder);
+                                s_logger.debug("Succesfully cloned " + template.getName()
+ "@" + this.rbdTemplateSnapName + " to " + disk.getName());
+                            }
+
+                            rbd.close(srcImage);
+                            r.ioCtxDestroy(io);
+                        } else {
+                            /* The source pool or host is not the same Ceph cluster, we do
a simple copy with Qemu-Img */
+                            s_logger.debug("Both the source and destination are RBD, but
not the same Ceph cluster. Performing a copy");
+
+                            Rados rSrc = new Rados(srcPool.getAuthUserName());
+                            rSrc.confSet("mon_host", srcPool.getSourceHost() + ":" + srcPool.getSourcePort());
+                            rSrc.confSet("key", srcPool.getAuthSecret());
+                            rSrc.connect();
+                            s_logger.debug("Succesfully connected to source Ceph cluster
at " + rSrc.confGet("mon_host"));
+
+                            Rados rDest = new Rados(destPool.getAuthUserName());
+                            rDest.confSet("mon_host", destPool.getSourceHost() + ":" + destPool.getSourcePort());
+                            rDest.confSet("key", destPool.getAuthSecret());
+                            rDest.connect();
+                            s_logger.debug("Succesfully connected to source Ceph cluster
at " + rDest.confGet("mon_host"));
+
+                            IoCTX sIO = rSrc.ioCtxCreate(srcPool.getSourceDir());
+                            Rbd sRbd = new Rbd(sIO);
+
+                            IoCTX dIO = rDest.ioCtxCreate(destPool.getSourceDir());
+                            Rbd dRbd = new Rbd(dIO);
+
+                            s_logger.debug("Creating " + disk.getName() + " on the destination
cluster " + rDest.confGet("mon_host")
+                                           + " in pool " + destPool.getSourceDir());
+                            dRbd.create(disk.getName(), template.getVirtualSize(), rbdFeatures,
rbdOrder);
+
+                            RbdImage srcImage = sRbd.open(template.getName());
+                            RbdImage destImage = dRbd.open(disk.getName());
+
+                            s_logger.debug("Copying " + template.getName() + " from Ceph
cluster " + rSrc.confGet("mon_host") + " to " + disk.getName()
+                                           + " on cluster " + rDest.confGet("mon_host"));
+                            sRbd.copy(srcImage, destImage);
+
+                            sRbd.close(srcImage);
+                            dRbd.close(destImage);
+                        
+                            rSrc.ioCtxDestroy(sIO);
+                            rDest.ioCtxDestroy(dIO);
+                        }
+                    } catch (RadosException e) {
+                        s_logger.error("Failed to perform a RADOS action on the Ceph cluster,
the error was: " + e.getMessage());
+                        disk = null;
+                    } catch (RbdException e) {
+                        s_logger.error("Failed to perform a RBD action on the Ceph cluster,
the error was: " + e.getMessage());
+                        disk = null;
+                    }
                 }
-                qemu.convert(srcFile, destFile);
             }
         } catch (QemuImgException e) {
             s_logger.error("Failed to create " + disk.getPath() +
                     " due to a failed executing of qemu-img: " + e.getMessage());
         }
+
+        if (disk == null) {
+            throw new CloudRuntimeException("Failed to create " + disk.getPath() + " from
template " + template.getName());
+        }
+
         return disk;
     }
 
@@ -733,17 +850,26 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
         }
     }
 
+    /**
+     * This copies a volume from Primary Storage to Secondary Storage
+     *
+     * In theory it could also do it the other way around, but the current implementation
+     * in ManagementServerImpl shows that the destPool is always a Secondary Storage Pool
+     */
     @Override
     public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk disk, String name,
             KVMStoragePool destPool) {
 
-        /*
+        /**
             With RBD you can't run qemu-img convert with an existing RBD image as destination
             qemu-img will exit with the error that the destination already exists.
             So for RBD we don't create the image, but let qemu-img do that for us.
 
             We then create a KVMPhysicalDisk object that we can return
-        */
+
+            It is however very unlikely that the destPool will be RBD, since it isn't supported
+            for Secondary Storage
+         */
 
         KVMPhysicalDisk newDisk;
         if (destPool.getType() != StoragePoolType.RBD) {
@@ -791,15 +917,97 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
                             + srcFile.getFileName() + " the error was: " + e.getMessage());
                 }
             }
+
+            try {
+                qemu.convert(srcFile, destFile);
+            } catch (QemuImgException e) {
+                s_logger.error("Failed to convert " + srcFile.getFileName() + " to "
+                        + destFile.getFileName() + " the error was: " + e.getMessage());
+            }
+
         } else if ((srcPool.getType() != StoragePoolType.RBD) && (destPool.getType()
== StoragePoolType.RBD))  {
-            srcFile = new QemuImgFile(sourcePath, sourceFormat);
-            destFile = new QemuImgFile(KVMPhysicalDisk.RBDStringBuilder(destPool.getSourceHost(),
-                    destPool.getSourcePort(),
-                    destPool.getAuthUserName(),
-                    destPool.getAuthSecret(),
-                    destPath));
-            destFile.setFormat(destFormat);
+            /**
+              * Qemu doesn't support writing to RBD format 2 directly, so we have to write
to a temporary RAW file first
+              * which we then convert to RBD format 2.
+              *
+              * A HUGE performance gain can be achieved here if QCOW2 -> RBD format 2
can be done in one step
+              */
+            s_logger.debug("The source image is not RBD, but the destination is. We will
convert into RBD format 2");
+            String tmpFile = "/tmp/" + name;
+            int rbdFeatures = (1<<0);
+            int rbdOrder = 0;
+
+            try {
+                srcFile = new QemuImgFile(sourcePath, sourceFormat);
+                destFile = new QemuImgFile(tmpFile);
+                s_logger.debug("Converting " + srcFile.getFileName() +  " to " + tmpFile
+  " as a temporary file for RBD conversion");
+                qemu.convert(srcFile, destFile);
+
+                // We now convert the temporary file to a RBD image with format 2
+                Rados r = new Rados(destPool.getAuthUserName());
+                r.confSet("mon_host", destPool.getSourceHost() + ":" + destPool.getSourcePort());
+                r.confSet("key", destPool.getAuthSecret());
+                r.connect();
+                s_logger.debug("Succesfully connected to Ceph cluster at " + r.confGet("mon_host"));
+
+                IoCTX io = r.ioCtxCreate(destPool.getSourceDir());
+                Rbd rbd = new Rbd(io);
+
+                s_logger.debug("Creating RBD image " + name + " in Ceph pool " + destPool.getSourceDir()
+ " with RBD format 2");
+                rbd.create(name, disk.getVirtualSize(), rbdFeatures, rbdOrder);
+
+                RbdImage image = rbd.open(name);
+
+                // We now read the temporary file and write it to the RBD image
+                File fh = new File(tmpFile);
+                BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fh));
+
+                int chunkSize = 4194304;
+                long offset = 0;
+                s_logger.debug("Reading temporary file " + tmpFile + " (" + fh.length() +
" bytes) into RBD image " + name + " in chunks of " + chunkSize + " bytes");
+                while(true) {
+                    byte[] buf = new byte[chunkSize];
+
+                    int bytes = bis.read(buf);
+                    if (bytes <= 0) {
+                        break;
+                    }
+                    image.write(buf, offset, bytes);
+                    offset += bytes;
+                }
+                s_logger.debug("Completed writing " + tmpFile + " to RBD image " + name +
". Bytes written: " + offset);
+                bis.close();
+                s_logger.debug("Removing temporary file " + tmpFile);
+                fh.delete();
+
+                /* Snapshot the image and protect that snapshot so we can clone (layer) from
it */
+                s_logger.debug("Creating RBD snapshot " + this.rbdTemplateSnapName + " on
image " + name);
+                image.snapCreate(this.rbdTemplateSnapName);
+                s_logger.debug("Protecting RBD snapshot " + this.rbdTemplateSnapName + "
on image " + name);
+                image.snapProtect(this.rbdTemplateSnapName);
+
+                rbd.close(image);
+                r.ioCtxDestroy(io);
+            } catch (QemuImgException e) {
+                s_logger.error("Failed to do a temp convert from " + srcFile.getFileName()
+ " to "
+                        + destFile.getFileName() + " the error was: " + e.getMessage());
+                newDisk = null;
+            } catch (RadosException e) {
+                s_logger.error("A Ceph RADOS operation failed (" + e.getReturnValue() + ").
The error was: " + e.getMessage());
+                newDisk = null;
+            } catch (RbdException e) {
+                s_logger.error("A Ceph RBD operation failed (" + e.getReturnValue() + ").
The error was: " + e.getMessage());
+                newDisk = null;
+            } catch (IOException e) {
+                s_logger.error("Failed reading the temporary file during the conversion to
RBD: " + e.getMessage());
+                newDisk = null;
+            }
+
         } else {
+            /**
+                We let Qemu-Img do the work here. Although we could work with librbd and
have that do the cloning
+                it doesn't benefit us. It's better to keep the current code in place which
works
+             */
             srcFile = new QemuImgFile(KVMPhysicalDisk.RBDStringBuilder(srcPool.getSourceHost(),
                     srcPool.getSourcePort(),
                     srcPool.getAuthUserName(),
@@ -812,17 +1020,19 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
                     destPool.getAuthSecret(),
                     destPath));
             destFile.setFormat(destFormat);
-        }
 
-        if (srcFile != null && destFile != null) {
             try {
                 qemu.convert(srcFile, destFile);
             } catch (QemuImgException e) {
                 s_logger.error("Failed to convert " + srcFile.getFileName() + " to "
                         + destFile.getFileName() + " the error was: " + e.getMessage());
+                newDisk = null;
             }
         }
 
+        if (newDisk == null) {
+            throw new CloudRuntimeException("Failed to copy " + disk.getPath() + " to " +
name);
+        }
 
         return newDisk;
     }

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/75c272db/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d7e80d6..67d9576 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,6 +83,7 @@
     <cs.java-ipv6.version>0.10</cs.java-ipv6.version>
     <cs.replace.properties>build/replace.properties</cs.replace.properties>
     <cs.libvirt-java.version>0.4.9</cs.libvirt-java.version>
+    <cs.rados-java.version>0.1.1</cs.rados-java.version>
     <cs.target.dir>target</cs.target.dir>
     <cs.daemon.version>1.0.10</cs.daemon.version>
   </properties>


Mime
View raw message