[lxc-devel] [lxc/master] network: Static routes for IPVLAN with L2PROXY

tomponline on Github lxc-bot at linuxcontainers.org
Wed May 1 15:55:24 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 453 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190501/f6e48807/attachment-0001.bin>
-------------- next part --------------
From ef4656b9e8a0c05bcf0ea30a959f28a3506d7125 Mon Sep 17 00:00:00 2001
From: tomponline <thomas.parrott at canonical.com>
Date: Tue, 30 Apr 2019 14:25:27 +0100
Subject: [PATCH 1/3] network: Adds layer 2 (ARP/NDP) proxy mode

Adds the lxc.net.[i].l2proxy flag that can be either 0 or 1.

Defaults to 0.

This, when used with lxc.net.[i].link, will add IP neighbour proxy entries on the linked device
for any IPv4 and IPv6 addresses on the container's network device.

Additionally, for IPv6 addresses it will check the following sysctl values and fail with an error if not set:

	net.ipv6.conf.[link].proxy_ndp=1
	net.ipv6.conf.[link].forwarding=1

Signed-off-by: tomponline <thomas.parrott at canonical.com>
---
 doc/api-extensions.md          |  13 +++
 doc/lxc.container.conf.sgml.in |  16 +++
 src/lxc/api_extensions.h       |   1 +
 src/lxc/confile.c              |  49 ++++++++
 src/lxc/confile_utils.c        |   4 +
 src/lxc/file_utils.c           |  17 ++-
 src/lxc/file_utils.h           |   2 +
 src/lxc/network.c              | 200 ++++++++++++++++++++++++++++++++-
 src/lxc/network.h              |   1 +
 9 files changed, 301 insertions(+), 2 deletions(-)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 8c95021ada..c301aadd76 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -51,3 +51,16 @@ The caller can read this message, inspect the syscalls including its arguments.
 This introduces the `lxc.net.[i].veth.ipv4.route` and `lxc.net.[i].veth.ipv6.route` properties
 on `veth` type network interfaces. This allows adding static routes on host to the container's
 network interface.
+
+## network\_l2proxy
+
+This introduces the `lxc.net.[i].l2proxy` that can be either `0` or `1`. Defaults to `0`.
+This, when used with `lxc.net.[i].link`, will add IP neighbour proxy entries on the linked device
+for any IPv4 and IPv6 addresses on the container's network device.
+
+Additionally, for IPv6 addresses it will check the following sysctl values and fail with an error if not set:
+
+```
+net.ipv6.conf.[link].proxy_ndp=1
+net.ipv6.conf.[link].forwarding=1
+```
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 3b3dd6ddeb..77157ca78e 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -543,6 +543,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
           </listitem>
         </varlistentry>
 
+        <varlistentry>
+          <term>
+            <option>lxc.net.[i].l2proxy</option>
+          </term>
+          <listitem>
+            <para>
+              Controls whether layer 2 IP neighbour proxy entries will be added to the
+              lxc.net.[i].link interface for the IP addresses of the container.
+              Can be set to 0 or 1. Defaults to 0.
+              When used with IPv6 addresses, the following sysctl values need to be set:
+              net.ipv6.conf.[link].proxy_ndp=1
+              net.ipv6.conf.[link].forwarding=1
+              </para>
+          </listitem>
+        </varlistentry>
+
         <varlistentry>
           <term>
             <option>lxc.net.[i].mtu</option>
diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h
index 529f19863e..ce34cd5af1 100644
--- a/src/lxc/api_extensions.h
+++ b/src/lxc/api_extensions.h
@@ -45,6 +45,7 @@ static char *api_extensions[] = {
 	"seccomp_allow_nesting",
 	"seccomp_notify",
 	"network_veth_routes",
+	"network_l2proxy",
 };
 
 static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index ebed11522f..398497bd7f 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -129,6 +129,7 @@ lxc_config_define(net_ipv4_gateway);
 lxc_config_define(net_ipv6_address);
 lxc_config_define(net_ipv6_gateway);
 lxc_config_define(net_link);
+lxc_config_define(net_l2proxy);
 lxc_config_define(net_macvlan_mode);
 lxc_config_define(net_mtu);
 lxc_config_define(net_name);
@@ -220,6 +221,7 @@ static struct lxc_config_t config_jump_table[] = {
 	{ "lxc.net.ipv6.address",          set_config_net_ipv6_address,            get_config_net_ipv6_address,            clr_config_net_ipv6_address,          },
 	{ "lxc.net.ipv6.gateway",          set_config_net_ipv6_gateway,            get_config_net_ipv6_gateway,            clr_config_net_ipv6_gateway,          },
 	{ "lxc.net.link",                  set_config_net_link,                    get_config_net_link,                    clr_config_net_link,                  },
+	{ "lxc.net.l2proxy",               set_config_net_l2proxy,                 get_config_net_l2proxy,                 clr_config_net_l2proxy,               },
 	{ "lxc.net.macvlan.mode",          set_config_net_macvlan_mode,            get_config_net_macvlan_mode,            clr_config_net_macvlan_mode,          },
 	{ "lxc.net.mtu",                   set_config_net_mtu,                     get_config_net_mtu,                     clr_config_net_mtu,                   },
 	{ "lxc.net.name",                  set_config_net_name,                    get_config_net_name,                    clr_config_net_name,                  },
@@ -396,6 +398,33 @@ static int set_config_net_link(const char *key, const char *value,
 	return ret;
 }
 
+static int set_config_net_l2proxy(const char *key, const char *value,
+				     struct lxc_conf *lxc_conf, void *data)
+{
+	struct lxc_netdev *netdev = data;
+	unsigned int val = 0;
+
+	if (lxc_config_value_empty(value))
+		return clr_config_net_l2proxy(key, lxc_conf, data);
+
+	if (!netdev)
+		return -1;
+
+	if (lxc_safe_uint(value, &val) < 0)
+		return minus_one_set_errno(EINVAL);
+
+	switch (val) {
+	case 0:
+		netdev->l2proxy = false;
+		return 0;
+	case 1:
+		netdev->l2proxy = true;
+		return 0;
+	}
+
+	return minus_one_set_errno(EINVAL);
+}
+
 static int set_config_net_name(const char *key, const char *value,
 			       struct lxc_conf *lxc_conf, void *data)
 {
@@ -4915,6 +4944,19 @@ static int clr_config_net_link(const char *key, struct lxc_conf *lxc_conf,
 	return 0;
 }
 
+static int clr_config_net_l2proxy(const char *key, struct lxc_conf *lxc_conf,
+			       void *data)
+{
+	struct lxc_netdev *netdev = data;
+
+	if (!netdev)
+		return -1;
+
+	netdev->l2proxy = false;
+
+	return 0;
+}
+
 static int clr_config_net_macvlan_mode(const char *key,
 				       struct lxc_conf *lxc_conf, void *data)
 {
@@ -5205,6 +5247,13 @@ static int get_config_net_link(const char *key, char *retv, int inlen,
 	return fulllen;
 }
 
+static int get_config_net_l2proxy(const char *key, char *retv, int inlen,
+			       struct lxc_conf *c, void *data)
+{
+	struct lxc_netdev *netdev = data;
+	return lxc_get_conf_bool(c, retv, inlen, netdev->l2proxy);
+}
+
 static int get_config_net_name(const char *key, char *retv, int inlen,
 			       struct lxc_conf *c, void *data)
 {
diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 67bf0824a2..870c6b7e58 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -328,6 +328,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 			if (netdev->link[0] != '\0')
 				TRACE("link: %s", netdev->link);
 
+			/* l2proxy only used when link is specified */
+			if (netdev->link[0] != '\0')
+				TRACE("l2proxy: %s", netdev->l2proxy ? "true" : "false");
+
 			if (netdev->name[0] != '\0')
 				TRACE("name: %s", netdev->name);
 
diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c
index 603c0ace66..fa8f934093 100644
--- a/src/lxc/file_utils.c
+++ b/src/lxc/file_utils.c
@@ -147,7 +147,7 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe
 	ssize_t ret;
 
 	ret = lxc_read_nointr(fd, buf, count);
-	if (ret <= 0)
+	if (ret < 0)
 		return ret;
 
 	if ((size_t)ret != count)
@@ -158,6 +158,21 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe
 		return -1;
 	}
 
+	return 0;
+}
+
+ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, const void *expected_buf)
+{
+	int fd;
+	ssize_t ret;
+
+	fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		return -1;
+
+	ret = lxc_read_nointr_expect(fd, buf, count, expected_buf);
+	close(fd);
+
 	return ret;
 }
 
diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h
index cc8f69e183..1b8033d69b 100644
--- a/src/lxc/file_utils.h
+++ b/src/lxc/file_utils.h
@@ -40,6 +40,8 @@ extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags);
 extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count);
 extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count,
 				      const void *expected_buf);
+extern ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count,
+				      const void *expected_buf);
 extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags);
 
 extern bool file_exists(const char *f);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index ec7dbccccf..4b8431691a 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -1497,6 +1497,25 @@ static int proc_sys_net_write(const char *path, const char *value)
 	return err;
 }
 
+static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
+{
+	int ret;
+	char path[PATH_MAX];
+	char buf[1] = "";
+
+	if (family != AF_INET && family != AF_INET6)
+		return minus_one_set_errno(EINVAL);
+
+	ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
+		       family == AF_INET ? "ipv4" : "ipv6", ifname,
+		       "forwarding");
+
+	if (ret < 0 || (size_t)ret >= PATH_MAX)
+		return minus_one_set_errno(E2BIG);
+
+	return lxc_read_file_expect(path, buf, 1, "1");
+}
+
 static int neigh_proxy_set(const char *ifname, int family, int flag)
 {
 	int ret;
@@ -1514,6 +1533,25 @@ static int neigh_proxy_set(const char *ifname, int family, int flag)
 	return proc_sys_net_write(path, flag ? "1" : "0");
 }
 
+static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
+{
+	int ret;
+	char path[PATH_MAX];
+	char buf[1] = "";
+
+	if (family != AF_INET && family != AF_INET6)
+		return minus_one_set_errno(EINVAL);
+
+	ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
+		       family == AF_INET ? "ipv4" : "ipv6", ifname,
+		       family == AF_INET ? "proxy_arp" : "proxy_ndp");
+
+	if (ret < 0 || (size_t)ret >= PATH_MAX)
+		return minus_one_set_errno(E2BIG);
+
+	return lxc_read_file_expect(path, buf, 1, "1");
+}
+
 int lxc_neigh_proxy_on(const char *name, int family)
 {
 	return neigh_proxy_set(name, family, 1);
@@ -2515,6 +2553,151 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 	return true;
 }
 
+struct ip_proxy_args {
+	const char *ip;
+	const char *dev;
+};
+
+static int lxc_add_ip_proxy_exec_wrapper(void *data)
+{
+	struct ip_proxy_args *args = data;
+
+	execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev,
+	       (char *)NULL);
+	return -1;
+}
+
+static int lxc_del_ip_proxy_exec_wrapper(void *data)
+{
+	struct ip_proxy_args *args = data;
+
+	execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev,
+	       (char *)NULL);
+	return -1;
+}
+
+static int lxc_add_ip_proxy(const char *ip, const char *dev)
+{
+	int ret;
+	char cmd_output[PATH_MAX];
+	struct ip_proxy_args args;
+	args.ip = ip;
+	args.dev = dev;
+
+	ret = run_command(cmd_output, sizeof(cmd_output),
+			  lxc_add_ip_proxy_exec_wrapper, (void *)&args);
+	if (ret < 0) {
+		ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int lxc_del_ip_proxy(const char *ip, const char *dev)
+{
+	int ret;
+	char cmd_output[PATH_MAX];
+	struct ip_proxy_args args;
+	args.ip = ip;
+	args.dev = dev;
+
+	ret = run_command(cmd_output, sizeof(cmd_output),
+			  lxc_del_ip_proxy_exec_wrapper, (void *)&args);
+	if (ret < 0) {
+		ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
+	struct lxc_list *cur, *next;
+	struct lxc_inetdev *inet4dev;
+	struct lxc_inet6dev *inet6dev;
+	char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
+
+	/* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
+	if (!lxc_list_empty(&netdev->ipv6)) {
+		/* Check for net.ipv6.conf.[link].proxy_ndp=1 */
+		if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
+			ERROR("l2proxy requires sysctl net.ipv6.conf.%s.proxy_ndp be set to 1", netdev->link);
+			return minus_one_set_errno(EINVAL);
+		}
+
+		/* Check for net.ipv6.conf.[link].forwarding=1 */
+		if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
+			ERROR("l2proxy requires sysctl net.ipv6.conf.%s.forwarding be set to 1", netdev->link);
+			return minus_one_set_errno(EINVAL);
+		}
+	}
+
+	lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
+		inet4dev = cur->elem;
+		if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4))) {
+			return minus_one_set_errno(EINVAL);
+		}
+
+		if (lxc_add_ip_proxy(bufinet4, netdev->link)) {
+			return minus_one_set_errno(EINVAL);
+		}
+	}
+
+	lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
+		inet6dev = cur->elem;
+		if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6))) {
+			return minus_one_set_errno(EINVAL);
+		}
+
+		if (lxc_add_ip_proxy(bufinet6, netdev->link)) {
+			return minus_one_set_errno(EINVAL);
+		}
+	}
+
+	return 0;
+}
+
+static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
+	struct lxc_list *cur, *next;
+	struct lxc_inetdev *inet4dev;
+	struct lxc_inet6dev *inet6dev;
+	char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
+	int err = 0;
+
+	lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
+		inet4dev = cur->elem;
+		if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4,sizeof(bufinet4))) {
+			err = -1;
+			continue; /* Try to remove any other l2proxy entries */
+		}
+
+		if (lxc_del_ip_proxy(bufinet4, netdev->link)) {
+			err = -1;
+			continue; /* Try to remove any other l2proxy entries */
+		}
+	}
+
+	lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
+		inet6dev = cur->elem;
+		if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6))) {
+			err = -1;
+			continue; /* Try to remove any other l2proxy entries */
+		}
+
+		if (lxc_del_ip_proxy(bufinet6, netdev->link)) {
+			err = -1;
+			continue; /* Try to remove any other l2proxy entries */
+		}
+	}
+
+	if (err < 0) {
+		return minus_one_set_errno(EINVAL);
+	}
+
+	return 0;
+}
+
 int lxc_create_network_priv(struct lxc_handler *handler)
 {
 	struct lxc_list *iterator;
@@ -2531,11 +2714,18 @@ int lxc_create_network_priv(struct lxc_handler *handler)
 			return -1;
 		}
 
+		/* Setup l2proxy entries if enabled and used with a link property */
+		if (netdev->l2proxy && netdev->link[0] != '\0') {
+			if (lxc_setup_l2proxy(netdev)) {
+				ERROR("Failed to setup l2proxy");
+				return -1;
+			}
+		}
+
 		if (netdev_conf[netdev->type](handler, netdev)) {
 			ERROR("Failed to create network device");
 			return -1;
 		}
-
 	}
 
 	return 0;
@@ -2631,6 +2821,14 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 		if (!netdev->ifindex)
 			continue;
 
+		/* Delete l2proxy entries if enabled and used with a link property */
+		if (netdev->l2proxy && netdev->link[0] != '\0') {
+			if (lxc_delete_l2proxy(netdev)) {
+				WARN("Failed to delete l2proxy");
+				/* Don't return, let the network be cleaned up as normal. */
+			}
+		}
+
 		if (netdev->type == LXC_NET_PHYS) {
 			ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
 			if (ret < 0)
diff --git a/src/lxc/network.h b/src/lxc/network.h
index e2757c1dba..a7ae82fc7b 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -164,6 +164,7 @@ struct lxc_netdev {
 	int type;
 	int flags;
 	char link[IFNAMSIZ];
+	bool l2proxy;
 	char name[IFNAMSIZ];
 	char *hwaddr;
 	char *mtu;

From b275a9ad5fffd63f317722da77ac7125ef4e5033 Mon Sep 17 00:00:00 2001
From: tomponline <thomas.parrott at canonical.com>
Date: Fri, 26 Apr 2019 11:26:45 +0100
Subject: [PATCH 2/3] network: Adds IPVLAN support

Example usage:

	lxc.net[i].type=ipvlan
	lxc.net[i].ipvlan.mode=[l3|l3s|l2] (defaults to l3)
	lxc.net[i].ipvlan.flags=[bridge|private|vepa] (defaults to bridge)
	lxc.net[i].link=eth0
	lxc.net[i].flags=up

Signed-off-by: tomponline <thomas.parrott at canonical.com>
---
 doc/api-extensions.md          |  14 +++
 doc/lxc.container.conf.sgml.in |  49 +++++++--
 src/lxc/api_extensions.h       |   1 +
 src/lxc/confile.c              | 175 +++++++++++++++++++++++++++++++--
 src/lxc/confile_utils.c        |  79 +++++++++++++++
 src/lxc/confile_utils.h        |   4 +
 src/lxc/macro.h                |  32 ++++++
 src/lxc/network.c              | 164 ++++++++++++++++++++++++++++++
 src/lxc/network.h              |   7 ++
 src/tests/parse_config_file.c  |  35 +++++++
 10 files changed, 545 insertions(+), 15 deletions(-)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index c301aadd76..91ffd0a2d6 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -64,3 +64,17 @@ Additionally, for IPv6 addresses it will check the following sysctl values and f
 net.ipv6.conf.[link].proxy_ndp=1
 net.ipv6.conf.[link].forwarding=1
 ```
+
+## network\_ipvlan
+
+This introduces the `ipvlan` network type.
+
+Example usage:
+
+```
+lxc.net[i].type=ipvlan
+lxc.net[i].ipvlan.mode=[l3|l3s|l2] (defaults to l3)
+lxc.net[i].ipvlan.isolation=[bridge|private|vepa] (defaults to bridge)
+lxc.net[i].link=eth0
+lxc.net[i].flags=up
+```
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 77157ca78e..2589028c22 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -485,7 +485,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
               different macvlan on the same upper device. The accepted
               modes are <option>private</option>, <option>vepa</option>,
               <option>bridge</option> and <option>passthru</option>.
-	      In <option>private</option> mode, the device never
+              In <option>private</option> mode, the device never
               communicates with any other device on the same upper_dev (default).
               In <option>vepa</option> mode, the new Virtual Ethernet Port
               Aggregator (VEPA) mode, it assumes that the adjacent
@@ -510,6 +510,41 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
               mode is possible for one physical interface.
             </para>
 
+            <para>
+              <option>ipvlan:</option> an ipvlan interface is linked
+              with the interface specified by
+              the <option>lxc.net.[i].link</option> and assigned to
+              the container.
+              <option>lxc.net.[i].ipvlan.mode</option> specifies the
+              mode the ipvlan will use to communicate between
+              different ipvlan on the same upper device. The accepted
+              modes are <option>l3</option>, <option>l3s</option> and
+              <option>l2</option>. It defaults to <option>l3</option> mode.
+              In <option>l3</option> mode TX processing up to L3 happens on the stack instance
+              attached to the slave device and packets are switched to the stack instance of the
+              master device for the L2 processing and routing from that instance will be
+              used before packets are queued on the outbound device. In this mode the slaves
+              will not receive nor can send multicast / broadcast traffic.
+              In <option>l3s</option> mode TX processing is very similar to the L3 mode except that
+              iptables (conn-tracking) works in this mode and hence it is L3-symmetric (L3s).
+              This will have slightly less performance but that shouldn't matter since you are
+              choosing this mode over plain-L3 mode to make conn-tracking work.
+              In <option>l2</option> mode TX processing happens on the stack instance attached to
+              the slave device and packets are switched and queued to the master device to send
+              out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well.
+              <option>lxc.net.[i].ipvlan.isolation</option> specifies the isolation mode.
+              The accepted isolation values are <option>bridge</option>,
+              <option>private</option> and <option>vepa</option>.
+              It defaults to <option>bridge</option>.
+              In <option>bridge</option> isolation mode slaves can cross-talk among themselves
+              apart from talking through the master device.
+              In <option>private</option> isolation mode the port is set in private mode.
+              i.e. port won't allow cross communication between slaves.
+              In <option>vepa</option> isolation mode the port is set in VEPA mode.
+              i.e. port will offload switching functionality to the external entity as
+              described in 802.1Qbg.
+            </para>
+
             <para>
               <option>phys:</option> an already existing interface
               specified by the <option>lxc.net.[i].link</option> is
@@ -626,8 +661,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
               interface (as specified by the
               <option>lxc.net.[i].link</option> option) and use that as
               the gateway. <option>auto</option> is only available when
-              using the <option>veth</option> and
-              <option>macvlan</option> network types.
+              using the <option>veth</option>, 
+              <option>macvlan</option> and <option>ipvlan</option> network types.
             </para>
           </listitem>
         </varlistentry>
@@ -660,8 +695,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
               interface (as specified by the
               <option>lxc.net.[i].link</option> option) and use that as
               the gateway. <option>auto</option> is only available when
-              using the <option>veth</option> and
-              <option>macvlan</option> network types.
+              using the <option>veth</option>,
+              <option>macvlan</option> and <option>ipvlan</option> network types.
             </para>
           </listitem>
         </varlistentry>
@@ -696,7 +731,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                 <listitem>
                  <para>
                  LXC_NET_TYPE: the network type. This is one of the valid
-                 network types listed here (e.g. 'macvlan', 'veth').
+                 network types listed here (e.g. 'vlan', 'macvlan', 'ipvlan', 'veth').
                   </para>
                 </listitem>
 
@@ -762,7 +797,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                 <listitem>
                  <para>
                  LXC_NET_TYPE: the network type. This is one of the valid
-                 network types listed here (e.g. 'macvlan', 'veth').
+                 network types listed here (e.g. 'vlan', 'macvlan', 'ipvlan', 'veth').
                   </para>
                 </listitem>
 
diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h
index ce34cd5af1..40a0d199c8 100644
--- a/src/lxc/api_extensions.h
+++ b/src/lxc/api_extensions.h
@@ -46,6 +46,7 @@ static char *api_extensions[] = {
 	"seccomp_notify",
 	"network_veth_routes",
 	"network_l2proxy",
+	"network_ipvlan",
 };
 
 static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 398497bd7f..b245213e8d 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -131,6 +131,8 @@ lxc_config_define(net_ipv6_gateway);
 lxc_config_define(net_link);
 lxc_config_define(net_l2proxy);
 lxc_config_define(net_macvlan_mode);
+lxc_config_define(net_ipvlan_mode);
+lxc_config_define(net_ipvlan_isolation);
 lxc_config_define(net_mtu);
 lxc_config_define(net_name);
 lxc_config_define(net_nic);
@@ -223,6 +225,8 @@ static struct lxc_config_t config_jump_table[] = {
 	{ "lxc.net.link",                  set_config_net_link,                    get_config_net_link,                    clr_config_net_link,                  },
 	{ "lxc.net.l2proxy",               set_config_net_l2proxy,                 get_config_net_l2proxy,                 clr_config_net_l2proxy,               },
 	{ "lxc.net.macvlan.mode",          set_config_net_macvlan_mode,            get_config_net_macvlan_mode,            clr_config_net_macvlan_mode,          },
+	{ "lxc.net.ipvlan.mode",           set_config_net_ipvlan_mode,             get_config_net_ipvlan_mode,             clr_config_net_ipvlan_mode,           },
+	{ "lxc.net.ipvlan.isolation",      set_config_net_ipvlan_isolation,        get_config_net_ipvlan_isolation,        clr_config_net_ipvlan_isolation,      },
 	{ "lxc.net.mtu",                   set_config_net_mtu,                     get_config_net_mtu,                     clr_config_net_mtu,                   },
 	{ "lxc.net.name",                  set_config_net_name,                    get_config_net_name,                    clr_config_net_name,                  },
 	{ "lxc.net.script.down",           set_config_net_script_down,             get_config_net_script_down,             clr_config_net_script_down,           },
@@ -293,21 +297,24 @@ static int set_config_net_type(const char *key, const char *value,
 	if (!netdev)
 		return -1;
 
-	if (!strcmp(value, "veth")) {
+	if (strcmp(value, "veth") == 0) {
 		netdev->type = LXC_NET_VETH;
 		lxc_list_init(&netdev->priv.veth_attr.ipv4_routes);
 		lxc_list_init(&netdev->priv.veth_attr.ipv6_routes);
-	} else if (!strcmp(value, "macvlan")) {
+	} else if (strcmp(value, "macvlan") == 0) {
 		netdev->type = LXC_NET_MACVLAN;
-		lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode,
-					 "private");
-	} else if (!strcmp(value, "vlan")) {
+		lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, "private");
+	} else if (strcmp(value, "ipvlan") == 0) {
+		netdev->type = LXC_NET_IPVLAN;
+		lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, "l3");
+		lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, "bridge");
+	} else if (strcmp(value, "vlan") == 0) {
 		netdev->type = LXC_NET_VLAN;
-	} else if (!strcmp(value, "phys")) {
+	} else if (strcmp(value, "phys") == 0) {
 		netdev->type = LXC_NET_PHYS;
-	} else if (!strcmp(value, "empty")) {
+	} else if (strcmp(value, "empty") == 0) {
 		netdev->type = LXC_NET_EMPTY;
-	} else if (!strcmp(value, "none")) {
+	} else if (strcmp(value, "none") == 0) {
 		netdev->type = LXC_NET_NONE;
 	} else {
 		ERROR("Invalid network type %s", value);
@@ -467,6 +474,44 @@ static int set_config_net_macvlan_mode(const char *key, const char *value,
 	return lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, value);
 }
 
+static int set_config_net_ipvlan_mode(const char *key, const char *value,
+				       struct lxc_conf *lxc_conf, void *data)
+{
+	struct lxc_netdev *netdev = data;
+
+	if (lxc_config_value_empty(value))
+		return clr_config_net_ipvlan_mode(key, lxc_conf, data);
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN) {
+		SYSERROR("Invalid ipvlan mode \"%s\", can only be used with ipvlan network", value);
+		return minus_one_set_errno(EINVAL);
+	}
+
+	return lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, value);
+}
+
+static int set_config_net_ipvlan_isolation(const char *key, const char *value,
+				       struct lxc_conf *lxc_conf, void *data)
+{
+	struct lxc_netdev *netdev = data;
+
+	if (lxc_config_value_empty(value))
+		return clr_config_net_ipvlan_isolation(key, lxc_conf, data);
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN) {
+		SYSERROR("Invalid ipvlan isolation \"%s\", can only be used with ipvlan network", value);
+		return minus_one_set_errno(EINVAL);
+	}
+
+	return lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, value);
+}
+
 static int set_config_net_hwaddr(const char *key, const char *value,
 				 struct lxc_conf *lxc_conf, void *data)
 {
@@ -4973,6 +5018,38 @@ static int clr_config_net_macvlan_mode(const char *key,
 	return 0;
 }
 
+static int clr_config_net_ipvlan_mode(const char *key,
+				       struct lxc_conf *lxc_conf, void *data)
+{
+	struct lxc_netdev *netdev = data;
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN)
+		return 0;
+
+	netdev->priv.ipvlan_attr.mode = -1;
+
+	return 0;
+}
+
+static int clr_config_net_ipvlan_isolation(const char *key,
+				       struct lxc_conf *lxc_conf, void *data)
+{
+	struct lxc_netdev *netdev = data;
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN)
+		return 0;
+
+	netdev->priv.ipvlan_attr.isolation = -1;
+
+	return 0;
+}
+
 static int clr_config_net_veth_pair(const char *key, struct lxc_conf *lxc_conf,
 				    void *data)
 {
@@ -5317,6 +5394,84 @@ static int get_config_net_macvlan_mode(const char *key, char *retv, int inlen,
 	return fulllen;
 }
 
+static int get_config_net_ipvlan_mode(const char *key, char *retv, int inlen,
+				       struct lxc_conf *c, void *data)
+{
+	int len;
+	int fulllen = 0;
+	const char *mode;
+	struct lxc_netdev *netdev = data;
+
+	if (!retv)
+		inlen = 0;
+	else
+		memset(retv, 0, inlen);
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN)
+		return 0;
+
+	switch (netdev->priv.ipvlan_attr.mode) {
+	case IPVLAN_MODE_L3:
+		mode = "l3";
+		break;
+	case IPVLAN_MODE_L3S:
+		mode = "l3s";
+		break;
+	case IPVLAN_MODE_L2:
+		mode = "l2";
+		break;
+	default:
+		mode = "(invalid)";
+		break;
+	}
+
+	strprint(retv, inlen, "%s", mode);
+
+	return fulllen;
+}
+
+static int get_config_net_ipvlan_isolation(const char *key, char *retv, int inlen,
+				       struct lxc_conf *c, void *data)
+{
+	int len;
+	int fulllen = 0;
+	const char *mode;
+	struct lxc_netdev *netdev = data;
+
+	if (!retv)
+		inlen = 0;
+	else
+		memset(retv, 0, inlen);
+
+	if (!netdev)
+		return minus_one_set_errno(EINVAL);
+
+	if (netdev->type != LXC_NET_IPVLAN)
+		return 0;
+
+	switch (netdev->priv.ipvlan_attr.isolation) {
+	case IPVLAN_ISOLATION_BRIDGE:
+		mode = "bridge";
+		break;
+	case IPVLAN_ISOLATION_PRIVATE:
+		mode = "private";
+		break;
+	case IPVLAN_ISOLATION_VEPA:
+		mode = "vepa";
+		break;
+	default:
+		mode = "(invalid)";
+		break;
+	}
+
+	strprint(retv, inlen, "%s", mode);
+
+	return fulllen;
+}
+
 static int get_config_net_veth_pair(const char *key, char *retv, int inlen,
 				    struct lxc_conf *c, void *data)
 {
@@ -5767,6 +5922,10 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen)
 	case LXC_NET_MACVLAN:
 		strprint(retv, inlen, "macvlan.mode\n");
 		break;
+	case LXC_NET_IPVLAN:
+		strprint(retv, inlen, "ipvlan.mode\n");
+		strprint(retv, inlen, "ipvlan.isolation\n");
+		break;
 	case LXC_NET_VLAN:
 		strprint(retv, inlen, "vlan.id\n");
 		break;
diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 870c6b7e58..12a8dbb095 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -299,6 +299,17 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 				      mode ? mode : "(invalid mode)");
 			}
 			break;
+		case LXC_NET_IPVLAN:
+			TRACE("type: ipvlan");
+
+			char *mode;
+			mode = lxc_ipvlan_flag_to_mode(netdev->priv.ipvlan_attr.mode);
+			TRACE("ipvlan mode: %s", mode ? mode : "(invalid mode)");
+
+			char *isolation;
+			isolation = lxc_ipvlan_flag_to_isolation(netdev->priv.ipvlan_attr.isolation);
+			TRACE("ipvlan isolation: %s", isolation ? isolation : "(invalid isolation)");
+			break;
 		case LXC_NET_VLAN:
 			TRACE("type: vlan");
 			TRACE("vlan id: %d", netdev->priv.vlan_attr.vid);
@@ -523,6 +534,74 @@ char *lxc_macvlan_flag_to_mode(int mode)
 	return NULL;
 }
 
+static struct lxc_ipvlan_mode {
+	char *name;
+	int mode;
+} ipvlan_mode[] = {
+    { "l3",  IPVLAN_MODE_L3  },
+    { "l3s", IPVLAN_MODE_L3S },
+    { "l2",  IPVLAN_MODE_L2  },
+};
+
+int lxc_ipvlan_mode_to_flag(int *mode, const char *value)
+{
+	for (size_t i = 0; i < sizeof(ipvlan_mode) / sizeof(ipvlan_mode[0]); i++) {
+		if (strcmp(ipvlan_mode[i].name, value) != 0)
+			continue;
+
+		*mode = ipvlan_mode[i].mode;
+		return 0;
+	}
+
+	return -1;
+}
+
+char *lxc_ipvlan_flag_to_mode(int mode)
+{
+	for (size_t i = 0; i < sizeof(ipvlan_mode) / sizeof(ipvlan_mode[0]); i++) {
+		if (ipvlan_mode[i].mode != mode)
+			continue;
+
+		return ipvlan_mode[i].name;
+	}
+
+	return NULL;
+}
+
+static struct lxc_ipvlan_isolation {
+	char *name;
+	int flag;
+} ipvlan_isolation[] = {
+    { "bridge",  IPVLAN_ISOLATION_BRIDGE  },
+    { "private", IPVLAN_ISOLATION_PRIVATE },
+    { "vepa",    IPVLAN_ISOLATION_VEPA    },
+};
+
+int lxc_ipvlan_isolation_to_flag(int *flag, const char *value)
+{
+	for (size_t i = 0; i < sizeof(ipvlan_isolation) / sizeof(ipvlan_isolation[0]); i++) {
+		if (strcmp(ipvlan_isolation[i].name, value) != 0)
+			continue;
+
+		*flag = ipvlan_isolation[i].flag;
+		return 0;
+	}
+
+	return -1;
+}
+
+char *lxc_ipvlan_flag_to_isolation(int flag)
+{
+	for (size_t i = 0; i < sizeof(ipvlan_isolation) / sizeof(ipvlan_isolation[0]); i++) {
+		if (ipvlan_isolation[i].flag != flag)
+			continue;
+
+		return ipvlan_isolation[i].name;
+	}
+
+	return NULL;
+}
+
 int set_config_string_item(char **conf_item, const char *value)
 {
 	char *new_value;
diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h
index 5a3bcc914c..cfed91dc09 100644
--- a/src/lxc/confile_utils.h
+++ b/src/lxc/confile_utils.h
@@ -58,6 +58,10 @@ extern bool lxc_remove_nic_by_idx(struct lxc_conf *conf, unsigned int idx);
 extern void lxc_free_networks(struct lxc_list *networks);
 extern int lxc_macvlan_mode_to_flag(int *mode, const char *value);
 extern char *lxc_macvlan_flag_to_mode(int mode);
+extern int lxc_ipvlan_mode_to_flag(int *mode, const char *value);
+extern char *lxc_ipvlan_flag_to_mode(int mode);
+extern int lxc_ipvlan_isolation_to_flag(int *mode, const char *value);
+extern char *lxc_ipvlan_flag_to_isolation(int mode);
 
 extern int set_config_string_item(char **conf_item, const char *value);
 extern int set_config_string_item_max(char **conf_item, const char *value,
diff --git a/src/lxc/macro.h b/src/lxc/macro.h
index 7df3b56f03..7626c5d76b 100644
--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -280,6 +280,14 @@ extern int __build_bug_on_failed;
 #define IFLA_MACVLAN_MODE 1
 #endif
 
+#ifndef IFLA_IPVLAN_MODE
+#define IFLA_IPVLAN_MODE 1
+#endif
+
+#ifndef IFLA_IPVLAN_ISOLATION
+#define IFLA_IPVLAN_ISOLATION 2
+#endif
+
 #ifndef IFLA_NEW_NETNSID
 #define IFLA_NEW_NETNSID 45
 #endif
@@ -333,6 +341,30 @@ extern int __build_bug_on_failed;
 #define MACVLAN_MODE_PASSTHRU 8
 #endif
 
+#ifndef IPVLAN_MODE_L2
+#define IPVLAN_MODE_L2 0
+#endif
+
+#ifndef IPVLAN_MODE_L3
+#define IPVLAN_MODE_L3 1
+#endif
+
+#ifndef IPVLAN_MODE_L3S
+#define IPVLAN_MODE_L3S 2
+#endif
+
+#ifndef IPVLAN_ISOLATION_BRIDGE
+#define IPVLAN_ISOLATION_BRIDGE 0
+#endif
+
+#ifndef IPVLAN_ISOLATION_PRIVATE
+#define IPVLAN_ISOLATION_PRIVATE 1
+#endif
+
+#ifndef IPVLAN_ISOLATION_VEPA
+#define IPVLAN_ISOLATION_VEPA 2
+#endif
+
 /* Attributes of RTM_NEWNSID/RTM_GETNSID messages */
 enum {
 	__LXC_NETNSA_NONE,
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 4b8431691a..d8d826b6f7 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -376,6 +376,147 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
 	return -1;
 }
 
+static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
+{
+	int err, index, len;
+	struct ifinfomsg *ifi;
+	struct nl_handler nlh;
+	struct rtattr *nest, *nest2;
+	struct nlmsg *answer = NULL, *nlmsg = NULL;
+
+	len = strlen(master);
+	if (len == 1 || len >= IFNAMSIZ)
+		return minus_one_set_errno(EINVAL);
+
+	len = strlen(name);
+	if (len == 1 || len >= IFNAMSIZ)
+		return minus_one_set_errno(EINVAL);
+
+	index = if_nametoindex(master);
+	if (!index)
+		return minus_one_set_errno(EINVAL);
+
+	err = netlink_open(&nlh, NETLINK_ROUTE);
+	if (err)
+		return minus_one_set_errno(-err);
+
+	err = -ENOMEM;
+	nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
+	if (!nlmsg)
+		goto out;
+
+	answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
+	if (!answer)
+		goto out;
+
+	nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+	nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+	ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+	if (!ifi) {
+		goto out;
+	}
+	ifi->ifi_family = AF_UNSPEC;
+
+	err = -EPROTO;
+	nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
+	if (!nest)
+		goto out;
+
+	if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
+		goto out;
+
+	if (mode) {
+		nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
+		if (!nest2)
+			goto out;
+
+		if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
+			goto out;
+
+		/* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
+		 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
+		 */
+		if (isolation > 0) {
+			if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
+				goto out;
+		}
+
+		nla_end_nested(nlmsg, nest2);
+	}
+
+	nla_end_nested(nlmsg, nest);
+
+	if (nla_put_u32(nlmsg, IFLA_LINK, index))
+		goto out;
+
+	if (nla_put_string(nlmsg, IFLA_IFNAME, name))
+		goto out;
+
+	err = netlink_transaction(&nlh, nlmsg, answer);
+out:
+	netlink_close(&nlh);
+	nlmsg_free(answer);
+	nlmsg_free(nlmsg);
+	if (err < 0)
+		return minus_one_set_errno(-err);
+	return 0;
+}
+
+static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+	char peerbuf[IFNAMSIZ], *peer;
+	int err;
+
+	if (netdev->link[0] == '\0') {
+		ERROR("No link for ipvlan network device specified");
+		return -1;
+	}
+
+	err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
+	if (err < 0 || (size_t)err >= sizeof(peerbuf))
+		return -1;
+
+	peer = lxc_mkifname(peerbuf);
+	if (!peer)
+		return -1;
+
+	err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
+	if (err) {
+		SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
+		goto on_error;
+	}
+
+	netdev->ifindex = if_nametoindex(peer);
+	if (!netdev->ifindex) {
+		ERROR("Failed to retrieve ifindex for \"%s\"", peer);
+		goto on_error;
+	}
+
+	if (netdev->upscript) {
+		char *argv[] = {
+		    "ipvlan",
+		    netdev->link,
+		    NULL,
+		};
+
+		err = run_script_argv(handler->name,
+				handler->conf->hooks_version, "net",
+				netdev->upscript, "up", argv);
+		if (err < 0)
+			goto on_error;
+	}
+
+	DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
+	      peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
+
+	return 0;
+
+on_error:
+	lxc_netdev_delete_by_name(peer);
+	return -1;
+}
+
 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
 {
 	char peer[IFNAMSIZ];
@@ -518,6 +659,7 @@ static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netd
 static  instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
 	[LXC_NET_VETH]    = instantiate_veth,
 	[LXC_NET_MACVLAN] = instantiate_macvlan,
+	[LXC_NET_IPVLAN]  = instantiate_ipvlan,
 	[LXC_NET_VLAN]    = instantiate_vlan,
 	[LXC_NET_PHYS]    = instantiate_phys,
 	[LXC_NET_EMPTY]   = instantiate_empty,
@@ -571,6 +713,26 @@ static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netd
 	return 0;
 }
 
+static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+	int ret;
+	char *argv[] = {
+		"ipvlan",
+		netdev->link,
+		NULL,
+	};
+
+	if (!netdev->downscript)
+		return 0;
+
+	ret = run_script_argv(handler->name, handler->conf->hooks_version,
+			      "net", netdev->downscript, "down", argv);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
 {
 	int ret;
@@ -638,6 +800,7 @@ static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
 static  instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
 	[LXC_NET_VETH]    = shutdown_veth,
 	[LXC_NET_MACVLAN] = shutdown_macvlan,
+	[LXC_NET_IPVLAN]  = shutdown_ipvlan,
 	[LXC_NET_VLAN]    = shutdown_vlan,
 	[LXC_NET_PHYS]    = shutdown_phys,
 	[LXC_NET_EMPTY]   = shutdown_empty,
@@ -2050,6 +2213,7 @@ static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
 	[LXC_NET_EMPTY]   = "empty",
 	[LXC_NET_VETH]    = "veth",
 	[LXC_NET_MACVLAN] = "macvlan",
+	[LXC_NET_IPVLAN]  = "ipvlan",
 	[LXC_NET_PHYS]    = "phys",
 	[LXC_NET_VLAN]    = "vlan",
 	[LXC_NET_NONE]    = "none",
diff --git a/src/lxc/network.h b/src/lxc/network.h
index a7ae82fc7b..468593f5e3 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -40,6 +40,7 @@ enum {
 	LXC_NET_EMPTY,
 	LXC_NET_VETH,
 	LXC_NET_MACVLAN,
+	LXC_NET_IPVLAN,
 	LXC_NET_PHYS,
 	LXC_NET_VLAN,
 	LXC_NET_NONE,
@@ -110,6 +111,11 @@ struct ifla_macvlan {
 	int mode; /* private, vepa, bridge, passthru */
 };
 
+struct ifla_ipvlan {
+	int mode; /* l3, l3s, l2 */
+	int isolation; /* bridge, private, vepa */
+};
+
 /* Contains information about the physical network device as seen from the host.
  * @ifindex : The ifindex of the physical network device in the host's network
  *            namespace.
@@ -120,6 +126,7 @@ struct ifla_phys {
 
 union netdev_p {
 	struct ifla_macvlan macvlan_attr;
+	struct ifla_ipvlan ipvlan_attr;
 	struct ifla_phys phys_attr;
 	struct ifla_veth veth_attr;
 	struct ifla_vlan vlan_attr;
diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c
index f4b4e9a287..ad17867b43 100644
--- a/src/tests/parse_config_file.c
+++ b/src/tests/parse_config_file.c
@@ -666,6 +666,11 @@ int main(int argc, char *argv[])
 		goto non_test_error;
 	}
 
+	if (set_get_compare_clear_save_load(c, "lxc.net.0.type", "ipvlan", tmpf, true)) {
+		lxc_error("%s\n", "lxc.net.0.type");
+		goto non_test_error;
+	}
+
 	if (set_get_compare_clear_save_load(c, "lxc.net.1000.type", "phys", tmpf, true)) {
 		lxc_error("%s\n", "lxc.net.1000.type");
 		goto non_test_error;
@@ -701,6 +706,36 @@ int main(int argc, char *argv[])
 		goto non_test_error;
 	}
 
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.mode", "l3", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.mode");
+		goto non_test_error;
+	}
+
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.mode", "l3s", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.mode");
+		goto non_test_error;
+	}
+
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.mode", "l2", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.mode");
+		goto non_test_error;
+	}
+
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.isolation", "bridge", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.isolation");
+		goto non_test_error;
+	}
+
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.isolation", "private", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.isolation");
+		goto non_test_error;
+	}
+
+	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.ipvlan.isolation", "vepa", tmpf, true, "ipvlan")) {
+		lxc_error("%s\n", "lxc.net.0.ipvlan.isolation");
+		goto non_test_error;
+	}
+
 	if (set_get_compare_clear_save_load_network(c, "lxc.net.0.veth.pair", "clusterfuck", tmpf, true, "veth")) {
 		lxc_error("%s\n", "lxc.net.0.veth.pair");
 		goto non_test_error;

From 98895fe49661525a8cd622b47102aa50a423c138 Mon Sep 17 00:00:00 2001
From: tomponline <thomas.parrott at canonical.com>
Date: Wed, 1 May 2019 16:17:33 +0100
Subject: [PATCH 3/3] network: Adds ipvlan static routes for l2proxy mode

Signed-off-by: tomponline <thomas.parrott at canonical.com>
---
 src/lxc/network.c | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index d8d826b6f7..62553c2911 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2781,18 +2781,35 @@ static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
 	struct lxc_inetdev *inet4dev;
 	struct lxc_inet6dev *inet6dev;
 	char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
+	int lo_ifindex, err;
 
 	/* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
 	if (!lxc_list_empty(&netdev->ipv6)) {
 		/* Check for net.ipv6.conf.[link].proxy_ndp=1 */
 		if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
-			ERROR("l2proxy requires sysctl net.ipv6.conf.%s.proxy_ndp be set to 1", netdev->link);
+			ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp be set to 1", netdev->link);
 			return minus_one_set_errno(EINVAL);
 		}
 
 		/* Check for net.ipv6.conf.[link].forwarding=1 */
 		if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
-			ERROR("l2proxy requires sysctl net.ipv6.conf.%s.forwarding be set to 1", netdev->link);
+			ERROR("Requires sysctl net.ipv6.conf.%s.forwarding be set to 1", netdev->link);
+			return minus_one_set_errno(EINVAL);
+		}
+	}
+
+	/* Perform IPVLAN specific checks. */
+	if (netdev->type == LXC_NET_IPVLAN) {
+		/* Check mode is l3s as other modes do not work with l2proxy. */
+		if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
+			ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
+			return minus_one_set_errno(EINVAL);
+		}
+
+		/* Retrieve local-loopback interface index for use with IPVLAN static routes. */
+		lo_ifindex = if_nametoindex("lo");
+		if (!lo_ifindex) {
+			ERROR("Failed to retrieve ifindex for \"lo\"");
 			return minus_one_set_errno(EINVAL);
 		}
 	}
@@ -2806,6 +2823,15 @@ static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
 		if (lxc_add_ip_proxy(bufinet4, netdev->link)) {
 			return minus_one_set_errno(EINVAL);
 		}
+
+		/* IPVLAN requires a route to local-loopback to trigger l2proxy. */
+		if (netdev->type == LXC_NET_IPVLAN) {
+			err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
+			if (err) {
+				ERROR("Failed to add ipv4 dest for network device \"lo\"");
+				return minus_one_set_errno(-err);
+			}
+		}
 	}
 
 	lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
@@ -2817,6 +2843,15 @@ static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
 		if (lxc_add_ip_proxy(bufinet6, netdev->link)) {
 			return minus_one_set_errno(EINVAL);
 		}
+
+		/* IPVLAN requires a route to local-loopback to trigger l2proxy. */
+		if (netdev->type == LXC_NET_IPVLAN) {
+			err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
+			if (err) {
+				ERROR("Failed to add ipv6 dest for network device \"lo\"");
+				return minus_one_set_errno(-err);
+			}
+		}
 	}
 
 	return 0;


More information about the lxc-devel mailing list