[lxc-devel] [PATCH] Add support for checkpoint and restore via CRIU

Tycho Andersen tycho.andersen at canonical.com
Wed Aug 20 03:14:03 UTC 2014


This patch adds support for checkpointing and restoring containers via CRIU.
It adds two api calls, ->checkpoint and ->restore, which are wrappers around
the CRIU CLI. CRIU has an RPC API, but reasons for preferring exec() are
discussed in [1].

To checkpoint, users specify a directory to dump the container metadata (CRIU
dump files, plus some additional information about veth pairs and which
bridges they are attached to) into this directory. On restore, this
information is read out of the directory, a CRIU command line is constructed,
and CRIU is exec()d. CRIU uses the lxc-restore-net callback (which in turn
inspects the image directory with the NIC data) to properly restore the
network.

This will only work with the current git master of CRIU; anything as of
a152c843 should work. There is a known bug where containers which have been
restored cannot be checkpointed [2].

[1]: http://lists.openvz.org/pipermail/criu/2014-July/015117.html
[2]: http://lists.openvz.org/pipermail/criu/2014-August/015876.html

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 .gitignore                 |   2 +
 configure.ac               |  24 +++
 doc/Makefile.am            |   2 +
 doc/lxc-checkpoint.sgml.in | 146 ++++++++++++++
 doc/lxc-restore.sgml.in    | 158 ++++++++++++++++
 src/lxc/Makefile.am        |  13 +-
 src/lxc/lxc-restore-net    |  28 +++
 src/lxc/lxc_checkpoint.c   | 119 ++++++++++++
 src/lxc/lxc_restore.c      | 130 +++++++++++++
 src/lxc/lxccontainer.c     | 461 +++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/lxccontainer.h     |  25 +++
 src/lxc/start.c            |   6 +-
 src/lxc/start.h            |   4 +
 src/lxc/utils.c            |  14 ++
 src/lxc/utils.h            |   1 +
 15 files changed, 1129 insertions(+), 4 deletions(-)
 create mode 100644 doc/lxc-checkpoint.sgml.in
 create mode 100644 doc/lxc-restore.sgml.in
 create mode 100755 src/lxc/lxc-restore-net
 create mode 100644 src/lxc/lxc_checkpoint.c
 create mode 100644 src/lxc/lxc_restore.c

diff --git a/.gitignore b/.gitignore
index e6de18f..bf9c944 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,6 +49,7 @@ src/lxc/lxc-attach
 src/lxc/lxc-autostart
 src/lxc/lxc-cgroup
 src/lxc/lxc-checkconfig
+src/lxc/lxc-checkpoint
 src/lxc/lxc-clone
 src/lxc/lxc-console
 src/lxc/lxc-config
@@ -62,6 +63,7 @@ src/lxc/lxc-init
 src/lxc/lxc-ls
 src/lxc/lxc-monitor
 src/lxc/lxc-monitord
+src/lxc/lxc-restore
 src/lxc/lxc-shutdown
 src/lxc/lxc-snapshot
 src/lxc/lxc-start
diff --git a/configure.ac b/configure.ac
index 462217e..54548bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -261,6 +261,27 @@ AM_COND_IF([ENABLE_SECCOMP],
 		])
 	])
 
+# criu
+AC_ARG_ENABLE([criu],
+	[AC_HELP_STRING([--enable-criu], [enable checkpoint/restore support [default=auto]])],
+	[], [enable_criu=auto])
+
+if test "x$enable_criu" = "xauto" ; then
+	AC_CHECK_PROG([CRIU_CHECK], [criu], [yes], [no], "$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin")
+	if test "x$CRIU_CHECK" = "xyes" ; then
+		enable_criu=yes
+	fi
+fi
+
+AM_CONDITIONAL([ENABLE_CRIU], [test "x$enable_criu" = "xyes"])
+AC_PATH_TOOL([CRIU_PATH], [criu], [no])
+if test "x$enable_criu" = "xyes" ; then
+	if test "x$CRIU_PATH" = "xno" ; then
+		AC_MSG_ERROR([Could not find criu])
+	fi
+	AC_DEFINE_UNQUOTED([CRIU_PATH], "$CRIU_PATH", [Criu path])
+fi
+
 # cgmanager
 AC_ARG_ENABLE([cgmanager],
 	[AC_HELP_STRING([--enable-cgmanager], [enable cgmanager support [default=auto]])],
@@ -652,6 +673,7 @@ AC_CONFIG_FILES([
 	doc/lxc-autostart.sgml
 	doc/lxc-cgroup.sgml
 	doc/lxc-checkconfig.sgml
+	doc/lxc-checkpoint.sgml
 	doc/lxc-clone.sgml
 	doc/lxc-config.sgml
 	doc/lxc-console.sgml
@@ -663,6 +685,7 @@ AC_CONFIG_FILES([
 	doc/lxc-info.sgml
 	doc/lxc-ls.sgml
 	doc/lxc-monitor.sgml
+	doc/lxc-restore.sgml
 	doc/lxc-snapshot.sgml
 	doc/lxc-start-ephemeral.sgml
 	doc/lxc-start.sgml
@@ -780,6 +803,7 @@ Environment:
  - GnuTLS: $enable_gnutls
  - Bash integration: $enable_bash
  - Openvswitch: $enable_ovs
+ - CRIU: $CRIU_PATH
 
 Security features:
  - Apparmor: $enable_apparmor
diff --git a/doc/Makefile.am b/doc/Makefile.am
index bfe887e..d8098a9 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -20,6 +20,7 @@ man_MANS = \
 	lxc-autostart.1 \
 	lxc-cgroup.1 \
 	lxc-checkconfig.1 \
+	lxc-checkpoint.1 \
 	lxc-clone.1 \
 	lxc-config.1 \
 	lxc-console.1 \
@@ -29,6 +30,7 @@ man_MANS = \
 	lxc-freeze.1 \
 	lxc-info.1 \
 	lxc-monitor.1 \
+	lxc-restore.1 \
 	lxc-snapshot.1 \
 	lxc-start.1 \
 	lxc-stop.1 \
diff --git a/doc/lxc-checkpoint.sgml.in b/doc/lxc-checkpoint.sgml.in
new file mode 100644
index 0000000..fa59748
--- /dev/null
+++ b/doc/lxc-checkpoint.sgml.in
@@ -0,0 +1,146 @@
+<!--
+
+(C) Copyright Canonical Ltd. 2014
+
+Authors:
+Tycho Andersen <tycho.andersen at canonical.com>
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+-->
+
+<!DOCTYPE refentry PUBLIC @docdtd@ [
+
+<!ENTITY commonoptions SYSTEM "@builddir@/common_options.sgml">
+<!ENTITY seealso SYSTEM "@builddir@/see_also.sgml">
+]>
+
+<refentry>
+
+  <docinfo><date>@LXC_GENERATE_DATE@</date></docinfo>
+
+  <refmeta>
+    <refentrytitle>lxc-checkpoint</refentrytitle>
+    <manvolnum>1</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>lxc-checkpoint</refname>
+
+    <refpurpose>
+      checkpoint a container
+    </refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>lxc-info</command>
+      <arg choice="req">-n <replaceable>name</replaceable></arg>
+      <arg choice="req">-D <replaceable>checkpoint-dir</replaceable></arg>
+      <arg choice="opt">-s <replaceable>stop</replaceable></arg>
+      <arg choice="opt">-v <replaceable>verbose</replaceable></arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>lxc-checkpoint</command> checkpoints a container for later use
+      by <command>lxc-restore</command>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>
+          <option>-D <replaceable>checkpoint-dir</replaceable></option>
+        </term>
+        <listitem>
+          <para>
+	    The directory to dump the checkpoint metadata.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option><optional>-s</optional></option>
+        </term>
+        <listitem>
+          <para>
+	    Optionally stop the container after dumping.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option><optional>-v</optional></option>
+        </term>
+        <listitem>
+          <para>
+	    Enable verbose criu logging.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &commonoptions;
+
+  <refsect1>
+    <title>Examples</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>lxc-checkpoint -n foo -D /tmp/checkpoint</term>
+        <listitem>
+          <para>
+	    Checkpoint the container foo into the directory /tmp/checkpoint.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &seealso;
+
+  <refsect1>
+    <title>Author</title>
+    <para>Tycho Andersen <email>tycho.andersen at canonical.com</email></para>
+  </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/lxc-restore.sgml.in b/doc/lxc-restore.sgml.in
new file mode 100644
index 0000000..0320c72
--- /dev/null
+++ b/doc/lxc-restore.sgml.in
@@ -0,0 +1,158 @@
+<!--
+
+(C) Copyright Canonical Ltd. 2014
+
+Authors:
+Tycho Andersen <tycho.andersen at canonical.com>
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+-->
+
+<!DOCTYPE refentry PUBLIC @docdtd@ [
+
+<!ENTITY commonoptions SYSTEM "@builddir@/common_options.sgml">
+<!ENTITY seealso SYSTEM "@builddir@/see_also.sgml">
+]>
+
+<refentry>
+
+  <docinfo><date>@LXC_GENERATE_DATE@</date></docinfo>
+
+  <refmeta>
+    <refentrytitle>lxc-restore</refentrytitle>
+    <manvolnum>1</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>lxc-restore</refname>
+
+    <refpurpose>
+      restore a container
+    </refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>lxc-info</command>
+      <arg choice="req">-n <replaceable>name</replaceable></arg>
+      <arg choice="req">-D <replaceable>restore-dir</replaceable></arg>
+      <arg choice="opt">-d <replacable>daemon</replacable></arg>
+      <arg choice="opt">-F <replacable>foreground</replacable></arg>
+      <arg choice="opt">-v <replaceable>verbose</replaceable></arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>lxc-restore</command> restores a container for later use
+      by <command>lxc-restore</command>.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>
+          <option>-D <replaceable>restore-dir</replaceable></option>
+        </term>
+        <listitem>
+          <para>
+	    The directory to dump the restore metadata.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option><optional>-d</optional></option>
+        </term>
+        <listitem>
+          <para>
+            Restore the container in the background (this is the default).
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option><optional>-F</optional></option>
+        </term>
+        <listitem>
+          <para>
+            Restore the container in the foreground.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option><optional>-v</optional></option>
+        </term>
+        <listitem>
+          <para>
+	    Enable verbose criu logging.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &commonoptions;
+
+  <refsect1>
+    <title>Examples</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>lxc-restore -n foo -D /tmp/restore</term>
+        <listitem>
+          <para>
+	    Checkpoint the container foo into the directory /tmp/restore.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &seealso;
+
+  <refsect1>
+    <title>Author</title>
+    <para>Tycho Andersen <email>tycho.andersen at canonical.com</email></para>
+  </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index f7bc31a..396b2ce 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -166,6 +166,11 @@ endif
 
 bin_SCRIPTS = lxc-checkconfig
 
+if ENABLE_CRIU
+liblxc_so_LDADD += $(CRIU_LIBS)
+liblxc_so_CFLAGS += $(CRIU_CFLAGS)
+endif
+
 EXTRA_DIST = \
 	lxc-device \
 	lxc-ls \
@@ -203,12 +208,16 @@ bin_PROGRAMS = \
 	lxc-unfreeze \
 	lxc-unshare \
 	lxc-usernsexec \
-	lxc-wait
+	lxc-wait \
+	lxc-checkpoint \
+	lxc-restore
 
 sbin_PROGRAMS = init.lxc
 pkglibexec_PROGRAMS = \
 	lxc-monitord \
 	lxc-user-nic
+pkglibexec_SCRIPTS = \
+	lxc-restore-net
 
 AM_LDFLAGS = -Wl,-E
 if ENABLE_RPATH
@@ -238,6 +247,8 @@ lxc_create_SOURCES = lxc_create.c
 lxc_snapshot_SOURCES = lxc_snapshot.c
 lxc_usernsexec_SOURCES = lxc_usernsexec.c
 lxc_user_nic_SOURCES = lxc_user_nic.c network.c network.h
+lxc_checkpoint_SOURCES = lxc_checkpoint.c
+lxc_restore_SOURCES = lxc_restore.c
 
 if HAVE_STATIC_LIBCAP
 sbin_PROGRAMS += init.lxc.static
diff --git a/src/lxc/lxc-restore-net b/src/lxc/lxc-restore-net
new file mode 100755
index 0000000..15a4aee
--- /dev/null
+++ b/src/lxc/lxc-restore-net
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+[ -z "$CRTOOLS_IMAGE_DIR" ] && exit 1
+
+set -e
+set -o pipefail
+
+dir="$CRTOOLS_IMAGE_DIR"
+
+[[ "network-unlock" == "$CRTOOLS_SCRIPT_ACTION" ||
+   "network-lock" == "$CRTOOLS_SCRIPT_ACTION" ]] || exit 0
+
+i=0
+while [ -f "$dir/eth$i" ] && [ -f "$dir/veth$i" ] && [ -f "$dir/bridge$i" ]; do
+	veth=$(cat "$dir/veth$i")
+	bridge=$(cat "$dir/bridge$i")
+
+	if [ "$CRTOOLS_SCRIPT_ACTION" == "network-lock" ]; then
+		brctl delif $bridge $veth
+	fi
+
+	if [ "$CRTOOLS_SCRIPT_ACTION" == "network-unlock" ]; then
+		brctl addif $bridge $veth
+		ifconfig $veth 0.0.0.0 up
+	fi
+
+	i=$((i+1))
+done
diff --git a/src/lxc/lxc_checkpoint.c b/src/lxc/lxc_checkpoint.c
new file mode 100644
index 0000000..1f15bbe
--- /dev/null
+++ b/src/lxc/lxc_checkpoint.c
@@ -0,0 +1,119 @@
+/*
+ *
+ * Copyright © 2014 Tycho Andersen <tycho.andersen at canonical.com>.
+ * Copyright © 2014 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <lxc/lxccontainer.h>
+
+#include "log.h"
+#include "config.h"
+#include "lxc.h"
+#include "arguments.h"
+
+static char *checkpoint_dir;
+static bool stop = false;
+static bool verbose = false;
+
+static const struct option my_longopts[] = {
+	{"checkpoint-dir", required_argument, 0, 'D'},
+	{"stop", no_argument, 0, 's'},
+	{"verbose", no_argument, 0, 'v'},
+	LXC_COMMON_OPTIONS
+};
+
+static int my_parser(struct lxc_arguments *args, int c, char *arg)
+{
+	switch (c) {
+	case 'D':
+		checkpoint_dir = strdup(arg);
+		if (!checkpoint_dir)
+			return -1;
+		break;
+	case 's':
+		stop = true;
+	case 'v':
+		verbose = true;
+	}
+	return 0;
+}
+
+static struct lxc_arguments my_args = {
+	.progname = "lxc-checkpoint",
+	.help     = "\
+--name=NAME\n\
+\n\
+lxc-checkpoint checkpoints a container\n\
+\n\
+Options :\n\
+  -n, --name=NAME           NAME for name of the container\n\
+  -D, --checkpoint-dir=DIR  directory to save the checkpoint in\n\
+  -s, --stop                Stop the container after checkpointing.\n\
+  -v, --verbose             Enable verbose criu logs\n\
+",
+	.options  = my_longopts,
+	.parser   = my_parser,
+	.checker  = NULL,
+};
+
+int main(int argc, char *argv[])
+{
+	struct lxc_container *c;
+	int ret;
+
+	if (lxc_arguments_parse(&my_args, argc, argv))
+		exit(1);
+
+	c = lxc_container_new(my_args.name, my_args.lxcpath[0]);
+	if (!c) {
+		fprintf(stderr, "System error loading %s\n", my_args.name);
+		exit(1);
+	}
+
+	if (!c->may_control(c)) {
+		fprintf(stderr, "Insufficent privileges to control %s\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+	if (!c->is_defined(c)) {
+		fprintf(stderr, "%s is not defined\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+
+	if (!c->is_running(c)) {
+		fprintf(stderr, "%s not running, not checkpointing.\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+	ret = c->checkpoint(c, checkpoint_dir, stop, verbose);
+	lxc_container_put(c);
+
+	if (ret < 0) {
+		fprintf(stderr, "Checkpointing %s failed.\n", my_args.name);
+		if (ret == -ENOSYS)
+			fprintf(stderr, "CRIU was not enabled at compile time.\n");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/src/lxc/lxc_restore.c b/src/lxc/lxc_restore.c
new file mode 100644
index 0000000..c21ed10
--- /dev/null
+++ b/src/lxc/lxc_restore.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * Copyright © 2014 Tycho Andersen <tycho.andersen at canonical.com>.
+ * Copyright © 2014 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <lxc/lxccontainer.h>
+
+#include "log.h"
+#include "config.h"
+#include "lxc.h"
+#include "arguments.h"
+
+static char *checkpoint_dir;
+static bool verbose = false;
+
+static const struct option my_longopts[] = {
+	{"checkpoint-dir", required_argument, 0, 'D'},
+	{"daemon", no_argument, 0, 'd'},
+	{"foreground", no_argument, 0, 'F'},
+	{"verbose", no_argument, 0, 'v'},
+	LXC_COMMON_OPTIONS
+};
+
+static int my_parser(struct lxc_arguments *args, int c, char *arg)
+{
+	switch (c) {
+	case 'D':
+		checkpoint_dir = strdup(arg);
+		if (!checkpoint_dir)
+			return -1;
+		break;
+	case 'd': args->daemonize = 1; break;
+	case 'F': args->daemonize = 0; break;
+	case 'v': verbose = true; break;
+	}
+	return 0;
+}
+
+static struct lxc_arguments my_args = {
+	.progname  = "lxc-restore",
+	.help      = "\
+--name=NAME\n\
+\n\
+lxc-restore restores a container from a checkpoint\n\
+\n\
+Options :\n\
+  -n, --name=NAME           NAME for name of the container\n\
+  -d, --daemon              Daemonize the container (default)\n\
+  -F, --foreground          Start with the current tty attached to /dev/console\n\
+  -D, --checkpoint-dir=DIR  directory of the saved checkpoint\n\
+  -v, --verbose             Enable verbose criu logs\n\
+",
+	.options   = my_longopts,
+	.parser    = my_parser,
+	.daemonize = 1,
+	.checker   = NULL,
+};
+
+int main(int argc, char *argv[])
+{
+	struct lxc_container *c;
+	int ret;
+	pid_t pid = 0;
+
+	if (lxc_arguments_parse(&my_args, argc, argv))
+		exit(1);
+
+	c = lxc_container_new(my_args.name, my_args.lxcpath[0]);
+	if (!c) {
+		fprintf(stderr, "System error loading %s\n", my_args.name);
+		exit(1);
+	}
+
+	if (!c->may_control(c)) {
+		fprintf(stderr, "Insufficent privileges to control %s\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+	if (!c->is_defined(c)) {
+		fprintf(stderr, "%s is not defined\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+
+	if (c->is_running(c)) {
+		fprintf(stderr, "%s is running, not restoring.\n", my_args.name);
+		lxc_container_put(c);
+		exit(1);
+	}
+
+	if (my_args.daemonize)
+		pid = fork();
+
+	if (pid == 0) {
+		ret = c->restore(c, checkpoint_dir, verbose);
+
+		lxc_container_put(c);
+
+		if (ret < 0) {
+			fprintf(stderr, "Restoring %s failed.\n", my_args.name);
+			if (ret == -ENOSYS)
+				fprintf(stderr, "CRIU was not enabled at compile time.\n");
+			return 1;
+		}
+	} else
+		lxc_container_put(c);
+
+
+	return 0;
+}
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 0cf21ce..43d9e2d 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -55,6 +55,7 @@
 #include "monitor.h"
 #include "namespace.h"
 #include "lxclock.h"
+#include "sync.h"
 
 #if HAVE_IFADDRS_H
 #include <ifaddrs.h>
@@ -62,6 +63,10 @@
 #include <../include/ifaddrs.h>
 #endif
 
+#ifdef CRIU_PATH
+#include <criu/criu.h>
+#endif
+
 #define MAX_BUFFER 4096
 
 #define NOT_SUPPORTED_ERROR "the requested function %s is not currently supported with unprivileged containers"
@@ -3476,6 +3481,460 @@ static bool lxcapi_remove_device_node(struct lxc_container *c, const char *src_p
 	return add_remove_device_node(c, src_path, dest_path, false);
 }
 
+#ifdef CRIU_PATH
+struct criu_opts {
+	/* The type of criu invocation, one of "dump" or "restore" */
+	char *action;
+
+	/* The directory to pass to criu */
+	char *directory;
+
+	/* The container to dump */
+	struct lxc_container *c;
+
+	/* Enable criu verbose mode? */
+	bool verbose;
+
+	/* dump: stop the container or not after dumping? */
+	bool stop;
+
+	/* restore: the file to write the init process' pid into */
+	char *pidfile;
+};
+
+/*
+ * @out must be 128 bytes long
+ */
+static int read_criu_file(const char *directory, const char *file, int netnr, char *out)
+{
+	char path[PATH_MAX];
+	int ret;
+	FILE *f;
+
+	ret = snprintf(path, PATH_MAX,  "%s/%s%d", directory, file, netnr);
+	if (ret < 0 || ret >= PATH_MAX) {
+		ERROR("%s: path too long", __func__);
+		return -1;
+	}
+
+	f = fopen(path, "r");
+	if (!f)
+		return -1;
+
+	ret = fscanf(f, "%127s", out);
+	fclose(f);
+	if (ret <= 0)
+		return -1;
+
+	return 0;
+}
+
+static void exec_criu(struct criu_opts *opts)
+{
+	char **argv, log[PATH_MAX];
+	int static_args = 12, argc = 0, i, ret;
+
+	/* The command line always looks like:
+	 * criu $(action) --tcp-established --file-locks --manage-cgroups \
+	 *     --action-script foo.sh -D $(directory) -o $(directory)/$(action).log
+	 * +1 for final NULL */
+
+	if (strcmp(opts->action, "dump") == 0) {
+		/* -t pid */
+		static_args += 2;
+
+		/* --leave-running */
+		if (!opts->stop)
+			static_args++;
+	} else if (strcmp(opts->action, "restore") == 0) {
+		/* --root $(lxc_mount_point) --restore-detached --pidfile $foo */
+		static_args += 5;
+	} else {
+		return;
+	}
+
+	if (opts->verbose)
+		static_args++;
+
+	ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
+	if (ret < 0 || ret >= PATH_MAX) {
+		ERROR("logfile name too long\n");
+		return;
+	}
+
+	argv = malloc(static_args * sizeof(*argv));
+	if (!argv)
+		return;
+
+	memset(argv, 0, static_args * sizeof(*argv));
+
+#define DECLARE_ARG(arg) 			\
+	do {					\
+		argv[argc++] = strdup(arg);	\
+		if (!argv[argc-1])		\
+			goto err;		\
+	} while (0)
+
+	DECLARE_ARG(CRIU_PATH);
+	DECLARE_ARG(opts->action);
+	DECLARE_ARG("--tcp-established");
+	DECLARE_ARG("--file-locks");
+	DECLARE_ARG("--manage-cgroups");
+	DECLARE_ARG("--action-script");
+	DECLARE_ARG(LIBEXECDIR "/lxc/lxc-restore-net");
+	DECLARE_ARG("-D");
+	DECLARE_ARG(opts->directory);
+	DECLARE_ARG("-o");
+	DECLARE_ARG(log);
+
+	if (opts->verbose)
+		DECLARE_ARG("-vvvvvv");
+
+	if (strcmp(opts->action, "dump") == 0) {
+		char pid[32];
+
+		if (sprintf(pid, "%ld", (long) opts->c->init_pid(opts->c)) < 0)
+			goto err;
+
+		DECLARE_ARG("-t");
+		DECLARE_ARG(pid);
+		if (!opts->stop)
+			DECLARE_ARG("--leave-running");
+	} else if (strcmp(opts->action, "restore") == 0) {
+		int netnr = 0;
+		struct lxc_list *it;
+
+		DECLARE_ARG("--root");
+		DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
+		DECLARE_ARG("--restore-detached");
+		DECLARE_ARG("--pidfile");
+		DECLARE_ARG(opts->pidfile);
+
+		lxc_list_for_each(it, &opts->c->lxc_conf->network) {
+			char eth[128], veth[128], buf[257];
+			void *m;
+
+			if (read_criu_file(opts->directory, "veth", netnr, veth))
+				goto err;
+			if (read_criu_file(opts->directory, "eth", netnr, eth))
+				goto err;
+			ret = snprintf(buf, 257, "%s=%s", eth, veth);
+			if (ret < 0 || ret >= 257)
+				goto err;
+
+			/* final NULL and --veth-pair eth0:vethASDF */
+			m = realloc(argv, (argc + 1 + 2) * sizeof(*argv));
+			if (!m)
+				goto err;
+			argv = m;
+
+			DECLARE_ARG("--veth-pair");
+			DECLARE_ARG(buf);
+			argv[argc] = NULL;
+
+			netnr++;
+		}
+	}
+
+#undef DECLARE_ARG
+
+	execv(argv[0], argv);
+err:
+	for (i = 0; argv[i]; i++)
+		free(argv[i]);
+	free(argv);
+}
+
+/* Check and make sure the container has a configuration that we know CRIU can
+ * dump. */
+static bool criu_ok(struct lxc_container *c)
+{
+	struct lxc_list *it;
+	bool found_deny_rule = false;
+
+	if (geteuid()) {
+		ERROR("Must be root to checkpoint\n");
+		return -1;
+	}
+
+	/* We only know how to restore containers with veth networks. */
+	lxc_list_for_each(it, &c->lxc_conf->network) {
+		struct lxc_netdev *n = it->elem;
+		if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE)
+			return false;
+	}
+
+	// These requirements come from http://criu.org/LXC
+	if (strcmp(c->lxc_conf->console.path, "none") != 0)
+		return false;
+
+	if (c->lxc_conf->tty != 0)
+		return false;
+
+	lxc_list_for_each(it, &c->lxc_conf->cgroup) {
+		struct lxc_cgroup *cg = it->elem;
+		if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+				strcmp(cg->value, "c 5:1 rwm") == 0) {
+
+			found_deny_rule = true;
+			break;
+		}
+	}
+
+	if (!found_deny_rule)
+		return false;
+
+	return true;
+}
+#endif
+
+static int lxcapi_checkpoint(struct lxc_container *c, char *directory, bool stop, bool verbose)
+{
+#ifdef CRIU_PATH
+	int netnr, ret = 0, status;
+	struct lxc_list *it;
+	pid_t pid;
+
+	if (!criu_ok(c))
+		return -1;
+
+	if (mkdir(directory, 0700) < 0 && errno != EEXIST)
+		return -1;
+
+	netnr = 0;
+	lxc_list_for_each(it, &c->lxc_conf->network) {
+		char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
+		struct lxc_netdev *n = it->elem;
+		int pret;
+
+		pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
+		if (pret < 0 || pret >= PATH_MAX) {
+			ret = -1;
+			goto out;
+		}
+
+		veth = c->get_running_config_item(c, veth_path);
+		if (!veth) {
+			/* criu_ok() checks that all interfaces are
+			 * LXC_NET{VETH,NONE}, and VETHs should have this
+			 * config */
+			assert(n->type == LXC_NET_NONE);
+			break;
+		}
+
+		pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.link", netnr);
+		if (pret < 0 || pret >= PATH_MAX) {
+			ret = -1;
+			goto out;
+		}
+
+		bridge = c->get_running_config_item(c, veth_path);
+		if (!bridge) {
+			ret = -1;
+			goto out;
+		}
+
+		pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
+		if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		pret = snprintf(veth_path, PATH_MAX, "%s/bridge%d", directory, netnr);
+		if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, bridge) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		if (n->name)
+			strncpy(eth, n->name, 128);
+		else
+			sprintf(eth, "eth%d", netnr);
+
+		pret = snprintf(veth_path, PATH_MAX, "%s/eth%d", directory, netnr);
+		if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, eth) < 0)
+			ret = -1;
+
+out:
+		free(veth);
+		free(bridge);
+		if (ret)
+			return ret;
+	}
+
+	pid = fork();
+	if (pid < 0)
+		return -1;
+
+	if (pid == 0) {
+		struct criu_opts os;
+
+		os.action = "dump";
+		os.directory = directory;
+		os.c = c;
+		os.stop = stop;
+		os.verbose = verbose;
+
+		/* exec_criu() returning is an error */
+		exec_criu(&os);
+		exit(1);
+	} else {
+		pid_t w = waitpid(pid, &status, 0);
+		if (w == -1) {
+			perror("waitpid");
+			return -1;
+		}
+
+		if (WIFEXITED(status)) {
+			return -WEXITSTATUS(status);
+		}
+
+		return -1;
+	}
+#else
+	return -ENOSYS;
+#endif
+}
+
+static int lxcapi_restore(struct lxc_container *c, char *directory, bool verbose)
+{
+#ifdef CRIU_PATH
+	pid_t pid;
+	struct lxc_list *it;
+	struct lxc_rootfs *rootfs;
+	char pidfile[L_tmpnam];
+
+	if (!criu_ok(c))
+		return -1;
+
+	if (geteuid()) {
+		ERROR("Must be root to restore\n");
+		return -1;
+	}
+
+	if (!tmpnam(pidfile))
+		return -1;
+
+	struct lxc_handler *handler;
+
+	handler = lxc_init(c->name, c->lxc_conf, c->config_path);
+	if (!handler)
+		return -1;
+
+	if (unshare(CLONE_NEWNS))
+		return -1;
+
+	/* CRIU needs the lxc root bind mounted so that it is the root of some
+	 * mount. */
+	rootfs = &c->lxc_conf->rootfs;
+
+	if (rootfs_is_blockdev(c->lxc_conf)) {
+		if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
+			return -1;
+	}
+	else {
+		if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
+			return -1;
+
+		if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
+			rmdir(rootfs->mount);
+			return -1;
+		}
+	}
+
+	pid = fork();
+	if (pid < 0)
+		return -1;
+
+	if (pid == 0) {
+		struct criu_opts os;
+
+		os.action = "restore";
+		os.directory = directory;
+		os.c = c;
+		os.pidfile = pidfile;
+		os.verbose = verbose;
+
+		/* exec_criu() returning is an error */
+		exec_criu(&os);
+		umount(rootfs->mount);
+		rmdir(rootfs->mount);
+		exit(1);
+	} else {
+		int status;
+		pid_t w = waitpid(pid, &status, 0);
+
+		if (w == -1) {
+			perror("waitpid");
+			return -1;
+		}
+
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status)) {
+				return -1;
+			}
+			else {
+				int netnr = 0, ret;
+				FILE *f = fopen(pidfile, "r");
+				if (!f) {
+					perror("reading pidfile");
+					ERROR("couldn't read restore's init pidfile %s\n", pidfile);
+					return -1;
+				}
+
+				ret = fscanf(f, "%d", (int*) &handler->pid);
+				fclose(f);
+				if (ret != 1) {
+					ERROR("reading restore pid failed");
+					return -1;
+				}
+
+				if (container_mem_lock(c))
+					return -1;
+
+				ret = 0;
+				lxc_list_for_each(it, &c->lxc_conf->network) {
+					char eth[128], veth[128];
+					struct lxc_netdev *netdev = it->elem;
+
+					if (read_criu_file(directory, "veth", netnr, veth)) {
+						ret = -1;
+						goto out_unlock;
+					}
+					if (read_criu_file(directory, "eth", netnr, eth)) {
+						ret = -1;
+						goto out_unlock;
+					}
+					netdev->priv.veth_attr.pair = strdup(veth);
+					if (!netdev->priv.veth_attr.pair) {
+						ret = -1;
+						goto out_unlock;
+					}
+					netnr++;
+				}
+out_unlock:
+				container_mem_unlock(c);
+				if (ret)
+					return ret;
+
+				if (lxc_set_state(c->name, handler, RUNNING))
+					return -1;
+			}
+		}
+
+		if (lxc_poll(c->name, handler)) {
+			lxc_abort(c->name, handler);
+			return -1;
+		}
+	}
+
+	return 0;
+
+#else
+	return -ENOSYS;
+#endif
+}
+
 static int lxcapi_attach_run_waitl(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...)
 {
 	va_list ap;
@@ -3608,6 +4067,8 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
 	c->may_control = lxcapi_may_control;
 	c->add_device_node = lxcapi_add_device_node;
 	c->remove_device_node = lxcapi_remove_device_node;
+	c->checkpoint = lxcapi_checkpoint;
+	c->restore = lxcapi_restore;
 
 	/* we'll allow the caller to update these later */
 	if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0, c->config_path)) {
diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
index 5085c43..899354d 100644
--- a/src/lxc/lxccontainer.h
+++ b/src/lxc/lxccontainer.h
@@ -760,6 +760,31 @@ struct lxc_container {
 	 * \return \c true on success, else \c false.
 	 */
 	bool (*remove_device_node)(struct lxc_container *c, const char *src_path, const char *dest_path);
+
+	/*!
+	 * \brief Checkpoint a container.
+	 *
+	 * \param c Container.
+	 * \param directory The directory to dump the container to.
+	 * \param stop Whether or not to stop the container after checkpointing.
+	 * \param verbose Enable criu's verbose logs.
+	 *
+	 * \return \c 0 on success, \c <0 on failure (-ENOSYS if criu wasn't
+	 * present at compile time).
+	 */
+	int (*checkpoint)(struct lxc_container *c, char *directory, bool stop, bool verbose);
+
+	/*!
+	 * \brief Restore a container from a checkpoint.
+	 *
+	 * \param c Container.
+	 * \param directory The directory to restore the container from.
+	 * \param verbose Enable criu's verbose logs.
+	 *
+	 * \return \c 0 on success \c <0 on failure (-ENOSYS if criu wasn't
+	 * present at compile time).
+	 */
+	int (*restore)(struct lxc_container *c, char *directory, bool verbose);
 };
 
 /*!
diff --git a/src/lxc/start.c b/src/lxc/start.c
index f282b93..98849e1 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -300,14 +300,14 @@ static int signal_handler(int fd, uint32_t events, void *data,
 	return 1;
 }
 
-static int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
+int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
 {
 	handler->state = state;
 	lxc_monitor_send_state(name, state, handler->lxcpath);
 	return 0;
 }
 
-static int lxc_poll(const char *name, struct lxc_handler *handler)
+int lxc_poll(const char *name, struct lxc_handler *handler)
 {
 	int sigfd = handler->sigfd;
 	int pid = handler->pid;
@@ -485,7 +485,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
 	free(handler);
 }
 
-static void lxc_abort(const char *name, struct lxc_handler *handler)
+void lxc_abort(const char *name, struct lxc_handler *handler)
 {
 	int ret, status;
 
diff --git a/src/lxc/start.h b/src/lxc/start.h
index ca7891c..8af0a06 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -74,6 +74,10 @@ struct lxc_handler {
 	void *cgroup_data;
 };
 
+
+extern int lxc_poll(const char *name, struct lxc_handler *handler);
+extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state);
+extern void lxc_abort(const char *name, struct lxc_handler *handler);
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
 
 extern int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore);
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index a32829d..ed34706 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1446,3 +1446,17 @@ out1:
 	free(retv);
 	return NULL;
 }
+
+int print_to_file(const char *file, const char *content)
+{
+	FILE *f;
+	int ret = 0;
+
+	f = fopen(file, "w");
+	if (!f)
+		return -1;
+	if (fprintf(f, "%s", content) != strlen(content))
+		ret = -1;
+	fclose(f);
+	return ret;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index a84b489..cdfe56a 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -282,3 +282,4 @@ int detect_ramfs_rootfs(void);
 char *on_path(char *cmd, const char *rootfs);
 bool file_exists(const char *f);
 char *choose_init(const char *rootfs);
+int print_to_file(const char *file, const char *content);
-- 
1.9.1



More information about the lxc-devel mailing list