[lxc-devel] [PATCH] implement loopback backing store
Serge Hallyn
serge.hallyn at ubuntu.com
Mon Jun 3 21:38:37 UTC 2013
Create a loopfile backed container by doing:
lxc-create -B loop -t template -n name
or
lxc-clone -B loop -o dir1 -n loop1
The rootfs in the configuration file will be
loop:/var/lib/lxc/loop1/rootdev
Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
doc/lxc-clone.sgml.in | 2 +-
doc/lxc-create.sgml.in | 2 +-
src/lxc/bdev.c | 309 +++++++++++++++++++++++++++++++++++++++++++++++--
src/lxc/bdev.h | 7 ++
src/lxc/lxc_clone.c | 2 +-
src/lxc/lxc_create.c | 12 +-
src/lxc/lxccontainer.c | 2 +-
7 files changed, 320 insertions(+), 16 deletions(-)
diff --git a/doc/lxc-clone.sgml.in b/doc/lxc-clone.sgml.in
index 2d2fda3..6885ff7 100644
--- a/doc/lxc-clone.sgml.in
+++ b/doc/lxc-clone.sgml.in
@@ -211,7 +211,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
default the same as the original container's is used. Note that
currently changing the backingstore is only supported for
overlayfs snapshots of directory backed containers. Valid
- backing stores include dir (directory), btrfs, lvm, zfs
+ backing stores include dir (directory), btrfs, lvm, zfs, loop
and overlayfs.
</para>
</listitem>
diff --git a/doc/lxc-create.sgml.in b/doc/lxc-create.sgml.in
index 3969246..f9b0228 100644
--- a/doc/lxc-create.sgml.in
+++ b/doc/lxc-create.sgml.in
@@ -125,7 +125,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
</term>
<listitem>
<para>
- 'backingstore' is one of 'none', 'dir', 'lvm', or 'btrfs'. The
+ 'backingstore' is one of 'none', 'dir', 'lvm', 'loop', or 'btrfs'. The
default is 'none', meaning that the container root filesystem
will be a directory under <filename>@LXCPATH@/container/rootfs</filename>.
'dir' has the same meaning as 'none', but also allows the optional
diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c
index 5b89ba4..9dea57b 100644
--- a/src/lxc/bdev.c
+++ b/src/lxc/bdev.c
@@ -35,6 +35,8 @@
#include <sys/mount.h>
#include <sys/wait.h>
#include <libgen.h>
+#include <linux/loop.h>
+#include <dirent.h>
#include "lxc.h"
#include "config.h"
#include "conf.h"
@@ -72,9 +74,16 @@ static int do_rsync(const char *src, const char *dest)
exit(1);
}
-static int blk_getsize(const char *path, unsigned long *size)
+/*
+ * return block size of dev->src
+ */
+static int blk_getsize(struct bdev *bdev, unsigned long *size)
{
int fd, ret;
+ char *path = bdev->src;
+
+ if (strcmp(bdev->type, "loop") == 0)
+ path = bdev->src + 5;
fd = open(path, O_RDONLY);
if (fd < 0)
@@ -177,6 +186,14 @@ static int do_mkfs(const char *path, const char *fstype)
if (pid > 0)
return wait_for_pid(pid);
+ // If the file is not a block device, we don't want mkfs to ask
+ // us about whether to proceed.
+ close(0);
+ close(1);
+ close(2);
+ open("/dev/zero", O_RDONLY);
+ open("/dev/null", O_RDWR);
+ open("/dev/null", O_RDWR);
execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
exit(1);
}
@@ -218,10 +235,14 @@ static int detect_fs(struct bdev *bdev, char *type, int len)
pid_t pid;
FILE *f;
char *sp1, *sp2, *sp3, *line = NULL;
+ char *srcdev = bdev->src;
if (!bdev || !bdev->src || !bdev->dest)
return -1;
+ if (strcmp(bdev->type, "loop") == 0)
+ srcdev = bdev->src + 5;
+
if (pipe(p) < 0)
return -1;
if ((pid = fork()) < 0)
@@ -243,21 +264,21 @@ static int detect_fs(struct bdev *bdev, char *type, int len)
}
wait(&status);
type[len-1] = '\0';
- INFO("detected fstype %s for %s", type, bdev->src);
+ INFO("detected fstype %s for %s", type, srcdev);
return ret;
}
if (unshare(CLONE_NEWNS) < 0)
exit(1);
- ret = mount_unknow_fs(bdev->src, bdev->dest, 0);
+ ret = mount_unknow_fs(srcdev, bdev->dest, 0);
if (ret < 0) {
- ERROR("failed mounting %s onto %s to detect fstype", bdev->src, bdev->dest);
+ ERROR("failed mounting %s onto %s to detect fstype", srcdev, bdev->dest);
exit(1);
}
// if symlink, get the real dev name
char devpath[MAXPATHLEN];
- char *l = linkderef(bdev->src, devpath);
+ char *l = linkderef(srcdev, devpath);
if (!l)
exit(1);
f = fopen("/proc/self/mounts", "r");
@@ -881,7 +902,7 @@ static int lvm_clonepaths(struct bdev *orig, struct bdev *new, const char *oldna
return -1;
if (is_blktype(orig)) {
- if (!newsize && blk_getsize(orig->src, &size) < 0) {
+ if (!newsize && blk_getsize(orig, &size) < 0) {
ERROR("Error getting size of %s", orig->src);
return -1;
}
@@ -928,8 +949,8 @@ static int lvm_destroy(struct bdev *orig)
return wait_for_pid(pid);
}
-#define DEFAULT_LVM_SZ 1024000000
-#define DEFAULT_LVM_FSTYPE "ext3"
+#define DEFAULT_FS_SIZE 1024000000
+#define DEFAULT_FSTYPE "ext3"
static int lvm_create(struct bdev *bdev, const char *dest, const char *n,
struct bdev_specs *specs)
{
@@ -959,7 +980,7 @@ static int lvm_create(struct bdev *bdev, const char *dest, const char *n,
// lvm.fssize is in bytes.
sz = specs->u.lvm.fssize;
if (!sz)
- sz = DEFAULT_LVM_SZ;
+ sz = DEFAULT_FS_SIZE;
INFO("Error creating new lvm blockdev %s size %lu", bdev->src, sz);
if (do_lvm_create(bdev->src, sz) < 0) {
@@ -969,7 +990,7 @@ static int lvm_create(struct bdev *bdev, const char *dest, const char *n,
fstype = specs->u.lvm.fstype;
if (!fstype)
- fstype = DEFAULT_LVM_FSTYPE;
+ fstype = DEFAULT_FSTYPE;
if (do_mkfs(bdev->src, fstype) < 0) {
ERROR("Error creating filesystem type %s on %s", fstype,
bdev->src);
@@ -1289,6 +1310,272 @@ struct bdev_ops btrfs_ops = {
};
//
+// loopback dev ops
+//
+static int loop_detect(const char *path)
+{
+ if (strncmp(path, "loop:", 5) == 0)
+ return 1;
+ return 0;
+}
+
+static int find_free_loopdev(int *retfd, char *namep)
+{
+ struct dirent dirent, *direntp;
+ struct loop_info64 lo;
+ DIR *dir;
+ int fd = -1;
+
+ if (!(dir = opendir("/dev"))) {
+ SYSERROR("Error opening /dev");
+ return -1;
+ }
+ while (!readdir_r(dir, &dirent, &direntp)) {
+
+ if (!direntp)
+ break;
+ if (strncmp(direntp->d_name, "loop", 4) != 0)
+ continue;
+ if ((fd = openat(dirfd(dir), direntp->d_name, O_RDWR)) < 0)
+ continue;
+ if (ioctl(fd, LOOP_GET_STATUS64, &lo) == 0 || errno != ENXIO) {
+ close(fd);
+ fd = -1;
+ continue;
+ }
+ // We can use this fd
+ snprintf(namep, 100, "/dev/%s", direntp->d_name);
+ break;
+ }
+ if (fd == -1) {
+ ERROR("No loop device found");
+ return -1;
+ }
+ closedir(dir);
+
+ *retfd = fd;
+ return 0;
+}
+
+static int loop_mount(struct bdev *bdev)
+{
+ int lfd, ffd = -1, ret = -1;
+ struct loop_info64 lo;
+ char loname[100];
+
+ if (strcmp(bdev->type, "loop"))
+ return -22;
+ if (!bdev->src || !bdev->dest)
+ return -22;
+ if (find_free_loopdev(&lfd, loname) < 0)
+ return -22;
+
+ if ((ffd = open(bdev->src + 5, O_RDWR)) < 0) {
+ SYSERROR("Error opening backing file %s\n", bdev->src);
+ goto out;
+ }
+
+ if (ioctl(lfd, LOOP_SET_FD, ffd) < 0) {
+ SYSERROR("Error attaching backing file to loop dev");
+ goto out;
+ }
+ memset(&lo, 0, sizeof(lo));
+ lo.lo_flags = LO_FLAGS_AUTOCLEAR;
+ if (ioctl(lfd, LOOP_SET_STATUS64, &lo) < 0) {
+ SYSERROR("Error setting autoclear on loop dev\n");
+ goto out;
+ }
+
+ ret = mount_unknow_fs(loname, bdev->dest, 0);
+ if (ret < 0)
+ ERROR("Error mounting %s\n", bdev->src);
+ else
+ bdev->lofd = lfd;
+
+out:
+ if (ffd > -1)
+ close(ffd);
+ if (ret < 0) {
+ close(lfd);
+ bdev->lofd = -1;
+ }
+ return ret;
+}
+
+static int loop_umount(struct bdev *bdev)
+{
+ int ret;
+
+ if (strcmp(bdev->type, "loop"))
+ return -22;
+ if (!bdev->src || !bdev->dest)
+ return -22;
+ ret = umount(bdev->dest);
+ if (bdev->lofd >= 0) {
+ close(bdev->lofd);
+ bdev->lofd = -1;
+ }
+ return ret;
+}
+
+static int do_loop_create(const char *path, unsigned long size, const char *fstype)
+{
+ int fd;
+ // create the new loopback file.
+ fd = creat(path, S_IRUSR|S_IWUSR);
+ if (fd < 0)
+ return -1;
+ if (lseek(fd, size, SEEK_SET) < 0) {
+ SYSERROR("Error seeking to set new loop file size");
+ close(fd);
+ return -1;
+ }
+ if (write(fd, "1", 1) != 1) {
+ SYSERROR("Error creating new loop file");
+ close(fd);
+ return -1;
+ }
+ if (close(fd) < 0) {
+ SYSERROR("Error closing new loop file");
+ return -1;
+ }
+
+ // create an fs in the loopback file
+ if (do_mkfs(path, fstype) < 0) {
+ ERROR("Error creating filesystem type %s on %s", fstype,
+ path);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * No idea what the original blockdev will be called, but the copy will be
+ * called $lxcpath/$lxcname/rootdev
+ */
+static int loop_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname,
+ const char *cname, const char *oldpath, const char *lxcpath, int snap,
+ unsigned long newsize)
+{
+ char fstype[100];
+ unsigned long size = newsize;
+ int len, ret;
+ char *srcdev;
+
+ if (snap) {
+ ERROR("loop devices cannot be snapshotted.");
+ return -1;
+ }
+
+ if (!orig->dest || !orig->src)
+ return -1;
+
+ len = strlen(lxcpath) + strlen(cname) + strlen("rootdev") + 3;
+ srcdev = alloca(len);
+ ret = snprintf(srcdev, len, "%s/%s/rootdev", lxcpath, cname);
+ if (ret < 0 || ret >= len)
+ return -1;
+
+ new->src = malloc(len + 5);
+ if (!new->src)
+ return -1;
+ ret = snprintf(new->src, len + 5, "loop:%s", srcdev);
+ if (ret < 0 || ret >= len + 5)
+ return -1;
+
+ new->dest = malloc(len);
+ if (!new->dest)
+ return -1;
+ ret = snprintf(new->dest, len, "%s/%s/rootfs", lxcpath, cname);
+ if (ret < 0 || ret >= len)
+ return -1;
+
+ // it's tempting to say: if orig->src == loopback and !newsize, then
+ // copy the loopback file. However, we'd have to make sure to
+ // correctly keep holes! So punt for now.
+
+ if (is_blktype(orig)) {
+ if (!newsize && blk_getsize(orig, &size) < 0) {
+ ERROR("Error getting size of %s", orig->src);
+ return -1;
+ }
+ if (detect_fs(orig, fstype, 100) < 0) {
+ INFO("could not find fstype for %s, using %s", orig->src,
+ DEFAULT_FSTYPE);
+ return -1;
+ }
+ } else {
+ sprintf(fstype, "%s", DEFAULT_FSTYPE);
+ if (!newsize)
+ size = DEFAULT_FS_SIZE; // default to 1G
+ }
+ return do_loop_create(srcdev, size, fstype);
+}
+
+static int loop_create(struct bdev *bdev, const char *dest, const char *n,
+ struct bdev_specs *specs)
+{
+ const char *fstype;
+ unsigned long sz;
+ int ret, len;
+ char *srcdev;
+
+ if (!specs)
+ return -1;
+
+ // dest is passed in as $lxcpath / $lxcname / rootfs
+ // srcdev will be: $lxcpath / $lxcname / rootdev
+ // src will be 'loop:$srcdev'
+ len = strlen(dest) + 2;
+ srcdev = alloca(len);
+
+ ret = snprintf(srcdev, len, "%s", dest);
+ if (ret < 0 || ret >= len)
+ return -1;
+ sprintf(srcdev + len - 4, "dev");
+
+ bdev->src = malloc(len + 5);
+ if (!bdev->src)
+ return -1;
+ ret = snprintf(bdev->src, len + 5, "loop:%s", srcdev);
+ if (ret < 0 || ret >= len + 5)
+ return -1;
+
+ sz = specs->u.loop.fssize;
+ if (!sz)
+ sz = DEFAULT_FS_SIZE;
+
+ fstype = specs->u.loop.fstype;
+ if (!fstype)
+ fstype = DEFAULT_FSTYPE;
+
+ if (!(bdev->dest = strdup(dest)))
+ return -1;
+
+ if (mkdir_p(bdev->dest, 0755) < 0) {
+ ERROR("Error creating %s\n", bdev->dest);
+ return -1;
+ }
+
+ return do_loop_create(srcdev, sz, fstype);
+}
+
+static int loop_destroy(struct bdev *orig)
+{
+ return unlink(orig->src + 5);
+}
+
+struct bdev_ops loop_ops = {
+ .detect = &loop_detect,
+ .mount = &loop_mount,
+ .umount = &loop_umount,
+ .clone_paths = &loop_clonepaths,
+ .destroy = &loop_destroy,
+ .create = &loop_create,
+};
+
+//
// overlayfs ops
//
@@ -1525,6 +1812,7 @@ struct bdev_type bdevs[] = {
{.name = "btrfs", .ops = &btrfs_ops,},
{.name = "dir", .ops = &dir_ops,},
{.name = "overlayfs", .ops = &overlayfs_ops,},
+ {.name = "loop", .ops = &loop_ops,},
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
@@ -1571,6 +1859,7 @@ struct bdev *bdev_init(const char *src, const char *dst, const char *data)
if (r)
break;
}
+
if (i == numbdevs)
return NULL;
bdev = malloc(sizeof(struct bdev));
diff --git a/src/lxc/bdev.h b/src/lxc/bdev.h
index 4f27ea9..1d79bb2 100644
--- a/src/lxc/bdev.h
+++ b/src/lxc/bdev.h
@@ -24,6 +24,10 @@ struct bdev_specs {
char *fstype;
unsigned long fssize; // fs size in bytes
} lvm;
+ struct {
+ char *fstype;
+ unsigned long fssize; // fs size in bytes
+ } loop;
} u;
};
@@ -55,6 +59,9 @@ struct bdev {
char *src;
char *dest;
char *data;
+ // turn the following into a union if need be
+ // lofd is the open fd for the mounted loopback file
+ int lofd;
};
char *overlayfs_getlower(char *p);
diff --git a/src/lxc/lxc_clone.c b/src/lxc/lxc_clone.c
index 2b0ee43..b29b621 100644
--- a/src/lxc/lxc_clone.c
+++ b/src/lxc/lxc_clone.c
@@ -24,7 +24,7 @@ void usage(const char *me)
printf(" -s: snapshot rather than copy\n");
printf(" -B: use specified new backingstore. Default is the same as\n");
printf(" the original. Options include btrfs, lvm, overlayfs, \n");
- printf(" dir\n");
+ printf(" dir and loop\n");
printf(" -L: for blockdev-backed backingstore, use specified size\n");
printf(" -K: Keep name - do not change the container name\n");
printf(" -M: Keep macaddr - do not choose a random new mac address\n");
diff --git a/src/lxc/lxc_create.c b/src/lxc/lxc_create.c
index c9231ef..6d8ca01 100644
--- a/src/lxc/lxc_create.c
+++ b/src/lxc/lxc_create.c
@@ -143,11 +143,14 @@ Options :\n\
bool validate_bdev_args(struct lxc_arguments *a)
{
- if (strcmp(a->bdevtype, "lvm") != 0) {
- if (a->fstype || a->fssize) {
+ if (a->fstype || a->fssize) {
+ if (strcmp(a->bdevtype, "lvm") != 0 &&
+ strcmp(a->bdevtype, "loop") != 0) {
fprintf(stderr, "filesystem type and size are only valid with block devices\n");
return false;
}
+ }
+ if (strcmp(a->bdevtype, "lvm") != 0) {
if (a->lvname || a->vgname) {
fprintf(stderr, "--lvname and --vgname are only valid with -B lvm\n");
return false;
@@ -213,6 +216,11 @@ int main(int argc, char *argv[])
spec.u.lvm.fstype = my_args.fstype;
if (my_args.fssize)
spec.u.lvm.fssize = my_args.fssize;
+ } else if (strcmp(my_args.bdevtype, "loop") == 0) {
+ if (my_args.fstype)
+ spec.u.lvm.fstype = my_args.fstype;
+ if (my_args.fssize)
+ spec.u.lvm.fssize = my_args.fssize;
} else if (my_args.dir) {
ERROR("--dir is not yet supported");
exit(1);
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 3764923..2edf749 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -534,7 +534,7 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
close(0);
close(1);
close(2);
- open("/dev/null", O_RDONLY);
+ open("/dev/zero", O_RDONLY);
open("/dev/null", O_RDWR);
open("/dev/null", O_RDWR);
setsid();
--
1.8.1.2
More information about the lxc-devel
mailing list