[lxc-devel] [lxd/master] Device Proxy
tomponline on Github
lxc-bot at linuxcontainers.org
Thu Jul 25 11:19:18 UTC 2019
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 358 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190725/9d05b12a/attachment-0001.bin>
-------------- next part --------------
From ad4fd989cb51b73c8ef6ff0b855058a06c48a9ab Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 15:33:59 +0100
Subject: [PATCH 01/32] dnsmasq: Adds RemoveStaticEntry function
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/dnsmasq/dnsmasq.go | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/lxd/dnsmasq/dnsmasq.go b/lxd/dnsmasq/dnsmasq.go
index 404e1d6821..aaeeddd3e9 100644
--- a/lxd/dnsmasq/dnsmasq.go
+++ b/lxd/dnsmasq/dnsmasq.go
@@ -34,8 +34,8 @@ type DHCPAllocation struct {
// ConfigMutex used to coordinate access to the dnsmasq config files.
var ConfigMutex sync.Mutex
-// UpdateStaticEntry writes a single dhcp-host line for a container/network combination.
-func UpdateStaticEntry(network string, projectName string, cName string, netConfig map[string]string, hwaddr string, ipv4Address string, ipv6Address string) error {
+// UpdateStaticEntry writes a single dhcp-host line for a network/instance combination.
+func UpdateStaticEntry(network string, projectName string, instanceName string, netConfig map[string]string, hwaddr string, ipv4Address string, ipv6Address string) error {
line := hwaddr
// Generate the dhcp-host line
@@ -48,14 +48,24 @@ func UpdateStaticEntry(network string, projectName string, cName string, netConf
}
if netConfig["dns.mode"] == "" || netConfig["dns.mode"] == "managed" {
- line += fmt.Sprintf(",%s", cName)
+ line += fmt.Sprintf(",%s", instanceName)
}
if line == hwaddr {
return nil
}
- err := ioutil.WriteFile(shared.VarPath("networks", network, "dnsmasq.hosts", project.Prefix(projectName, cName)), []byte(line+"\n"), 0644)
+ err := ioutil.WriteFile(shared.VarPath("networks", network, "dnsmasq.hosts", project.Prefix(projectName, instanceName)), []byte(line+"\n"), 0644)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// RemoveStaticEntry removes a single dhcp-host line for a network/instance combination.
+func RemoveStaticEntry(network string, projectName string, instanceName string) error {
+ err := os.Remove(shared.VarPath("networks", network, "dnsmasq.hosts", project.Prefix(projectName, instanceName)))
if err != nil {
return err
}
From 90f18fef57f7535b54f0e932df77b46c3d3114f5 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 25 Jul 2019 11:39:32 +0100
Subject: [PATCH 02/32] dnsmasq: Removes RebuildConfig function link to
networkUpdateStatic
No longer required in device package.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/dnsmasq/dnsmasq.go | 4 ----
lxd/networks_utils.go | 5 -----
2 files changed, 9 deletions(-)
diff --git a/lxd/dnsmasq/dnsmasq.go b/lxd/dnsmasq/dnsmasq.go
index aaeeddd3e9..e5bf9fb5b4 100644
--- a/lxd/dnsmasq/dnsmasq.go
+++ b/lxd/dnsmasq/dnsmasq.go
@@ -13,16 +13,12 @@ import (
"sync"
"github.com/lxc/lxd/lxd/project"
- "github.com/lxc/lxd/lxd/state"
"github.com/lxc/lxd/shared"
"github.com/lxc/lxd/shared/version"
"golang.org/x/sys/unix"
)
-// RebuildConfig function to completely rebuild dnsmasq config for a network.
-var RebuildConfig func(s *state.State, networkName string) error
-
// DHCPAllocation represents an IP allocation from dnsmasq.
type DHCPAllocation struct {
IP net.IP
diff --git a/lxd/networks_utils.go b/lxd/networks_utils.go
index ff2d79d650..bdd39284ff 100644
--- a/lxd/networks_utils.go
+++ b/lxd/networks_utils.go
@@ -38,11 +38,6 @@ import (
var forkdnsServersLock sync.Mutex
-func init() {
- // Link the networkUpdateStatic in here to the dnsmasq package so that other packages can use it.
- dnsmasq.RebuildConfig = networkUpdateStatic
-}
-
func networkAutoAttach(cluster *db.Cluster, devName string) error {
_, dbInfo, err := cluster.NetworkGetInterface(devName)
if err != nil {
From ae6a8a0cad32a819e4095c7eb999582711b20b4f Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 09:50:44 +0100
Subject: [PATCH 03/32] device/runconfig: Adds the struct types returned when a
device is started
These types contain runtime config used to configure LXC and LXD with the device.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_runconfig.go | 14 ++++++++++++++
1 file changed, 14 insertions(+)
create mode 100644 lxd/device/device_runconfig.go
diff --git a/lxd/device/device_runconfig.go b/lxd/device/device_runconfig.go
new file mode 100644
index 0000000000..b3e8eed2b5
--- /dev/null
+++ b/lxd/device/device_runconfig.go
@@ -0,0 +1,14 @@
+package device
+
+// RunConfigItem represents a single config item.
+type RunConfigItem struct {
+ Key string
+ Value string
+}
+
+// RunConfig represents LXD defined run-time config used for device setup.
+type RunConfig struct {
+ NetworkInterfaces [][]RunConfigItem
+ Mounts []map[string]string
+ Cgroups []map[string]string
+}
From 3bfc8fce7b04d495d7fedeb478084f632ba4717e Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 09:52:21 +0100
Subject: [PATCH 04/32] device: Adds device interface and common device
implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device.go | 128 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 128 insertions(+)
create mode 100644 lxd/device/device.go
diff --git a/lxd/device/device.go b/lxd/device/device.go
new file mode 100644
index 0000000000..4aa8b2d726
--- /dev/null
+++ b/lxd/device/device.go
@@ -0,0 +1,128 @@
+package device
+
+import (
+ "fmt"
+
+ "github.com/lxc/lxd/lxd/device/config"
+ "github.com/lxc/lxd/lxd/state"
+)
+
+// devTypes defines supported top-level device type creation functions.
+var devTypes = map[string]func(config.Device) device{
+ "nic": loadNICByType,
+}
+
+// ErrUnsupportedDevType is the error that occurs when an unsupported device type is created.
+var ErrUnsupportedDevType = fmt.Errorf("Unsupported device type")
+
+// Device represents a device that can be added to an instance.
+type Device interface {
+ // CanHotPlug returns true if device can be managed whilst instance is running.
+ // It also returns a slice of config fields that can be live updated. If only fields in this
+ // list have changed then Update() is called rather than triggering a device remove & add.
+ CanHotPlug() (bool, []string)
+
+ // Add performs any host-side setup when a device is added to an instance.
+ // It is called irrespective of whether the instance is running or not.
+ Add() error
+
+ // Start peforms any host-side configuration required to start the device for the instance.
+ // This can be when a device is plugged into a running instance or the instance is starting.
+ // Returns run-time configuration needed for configuring the instance with the new device.
+ Start() (*RunConfig, error)
+
+ // Update performs host-side modifications for a device based on the difference between the
+ // current config and previous config supplied as an argument. This called if the only
+ // config fields that have changed are supplied in the list returned from CanHotPlug().
+ // The function also accepts a boolean indicating whether the instance is running or not.
+ Update(config.Device, bool) error
+
+ // Stop performs any host-side cleanup required when a device is removed from an instance,
+ // either due to unplugging it from a running instance or instance is being shutdown.
+ Stop() error
+
+ // Remove performs any host-side cleanup when an instance is removed from an instance.
+ Remove() error
+}
+
+// device represents a sealed interface that implements Device, but also contains some internal
+// setup functions for a Device that should only be called by device.New() to avoid exposing devices
+// that are not in a known configured state. This is separate from the Device interface so that
+// Devices created outside of the device package can be used by LXD, but ensures that any devices
+// created by the device package will only be accessible after being configured properly by New().
+type device interface {
+ Device
+
+ // init stores the InstanceIdentifier, daemon State and Config into device and performs any setup.
+ init(InstanceIdentifier, *state.State, config.Device, func() map[string]string, func(map[string]string) error)
+
+ // validate checks Config stored by init() is valid for the instance type.
+ validate() error
+}
+
+// deviceCommon represents the common struct for all devices.
+type deviceCommon struct {
+ instance InstanceIdentifier
+ config map[string]string
+ state *state.State
+ volatileGet func() map[string]string
+ volatileSet func(map[string]string) error
+}
+
+// init stores the InstanceIdentifier, daemon state and Config into device. It can also be provided
+// with volatile get and set functions for the device to allow persistent data to be accessed.
+// This is implemented as part of deviceCommon so that the majority of devices don't need to
+// implement it and can just embed deviceCommon.
+func (d *deviceCommon) init(instance InstanceIdentifier, state *state.State, conf config.Device, volatileGet func() map[string]string, volatileSet func(map[string]string) error) {
+ d.instance = instance
+ d.config = conf
+ d.state = state
+ d.volatileGet = volatileGet
+ d.volatileSet = volatileSet
+}
+
+// Add returns nil error as majority of devices don't need to do any host-side setup.
+func (d *deviceCommon) Add() error {
+ return nil
+}
+
+// CanHotPlug returns true as majority of devices can be started/stopped when instance is running.
+// Also returns an empty list of update fields as most devices do not support live updates.
+func (d *deviceCommon) CanHotPlug() (bool, []string) {
+ return true, []string{}
+}
+
+// Update returns an error as most devices do not support live updates without being restarted.
+func (d *deviceCommon) Update(oldConfig config.Device, isRunning bool) error {
+ return fmt.Errorf("Device does not support updates whilst started")
+}
+
+// Remove returns nil error as majority of devices don't need to do any host-side cleanup on delete.
+func (d *deviceCommon) Remove() error {
+ return nil
+}
+
+// New instantiates a new device struct, validates the supplied config and sets it into the device.
+func New(instance InstanceIdentifier, state *state.State, conf config.Device, volatileGet func() map[string]string, volatileSet func(map[string]string) error) (Device, error) {
+ devFunc := devTypes[conf["type"]]
+
+ // Check if top-level type is recognised, if it is known type it will return a function.
+ if devFunc == nil {
+ return nil, ErrUnsupportedDevType
+ }
+
+ // Run the device create function and check it succeeds.
+ dev := devFunc(conf)
+ if dev == nil {
+ return nil, ErrUnsupportedDevType
+ }
+
+ // Init the device and run validation of supplied config.
+ dev.init(instance, state, conf, volatileGet, volatileSet)
+ err := dev.validate()
+ if err != nil {
+ return nil, err
+ }
+
+ return dev, nil
+}
From 849ad0b14a0c681bbe19f411da42a92b34facc86 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 09:53:32 +0100
Subject: [PATCH 05/32] device/instance/id: Adds functions to interface to
expose config
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_instance_id.go | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/lxd/device/device_instance_id.go b/lxd/device/device_instance_id.go
index c65c4eb774..e885f98e3a 100644
--- a/lxd/device/device_instance_id.go
+++ b/lxd/device/device_instance_id.go
@@ -1,8 +1,14 @@
package device
+import (
+ "github.com/lxc/lxd/lxd/device/config"
+)
+
// InstanceIdentifier is an interface that allows us to identify an Instance and its properties.
type InstanceIdentifier interface {
Name() string
Type() string
Project() string
+ ExpandedConfig() map[string]string
+ ExpandedDevices() config.Devices
}
From 9dbd6f4af34e8275a7da0668466662c85f0933c1 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 09:54:26 +0100
Subject: [PATCH 06/32] api/internal: Removes OnNetworkUp hook command
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/api_internal.go | 27 ---------------------------
lxd/container.go | 1 -
2 files changed, 28 deletions(-)
diff --git a/lxd/api_internal.go b/lxd/api_internal.go
index 69e2dd323d..fee4d9ebfa 100644
--- a/lxd/api_internal.go
+++ b/lxd/api_internal.go
@@ -34,7 +34,6 @@ var apiInternal = []APIEndpoint{
internalReadyCmd,
internalShutdownCmd,
internalContainerOnStartCmd,
- internalContainerOnNetworkUpCmd,
internalContainerOnStopNSCmd,
internalContainerOnStopCmd,
internalContainersCmd,
@@ -77,12 +76,6 @@ var internalContainerOnStopCmd = APIEndpoint{
Get: APIEndpointAction{Handler: internalContainerOnStop},
}
-var internalContainerOnNetworkUpCmd = APIEndpoint{
- Name: "containers/{id}/onnetwork-up",
-
- Get: APIEndpointAction{Handler: internalContainerOnNetworkUp},
-}
-
var internalSQLCmd = APIEndpoint{
Name: "sql",
@@ -195,26 +188,6 @@ func internalContainerOnStop(d *Daemon, r *http.Request) Response {
return EmptySyncResponse
}
-func internalContainerOnNetworkUp(d *Daemon, r *http.Request) Response {
- id, err := strconv.Atoi(mux.Vars(r)["id"])
- if err != nil {
- return SmartError(err)
- }
-
- c, err := containerLoadById(d.State(), id)
- if err != nil {
- return SmartError(err)
- }
-
- err = c.OnNetworkUp(queryParam(r, "device"), queryParam(r, "host_name"))
- if err != nil {
- logger.Error("The network up script failed", log.Ctx{"container": c.Name(), "err": err})
- return SmartError(err)
- }
-
- return EmptySyncResponse
-}
-
type internalSQLDump struct {
Text string `json:"text" yaml:"text"`
}
diff --git a/lxd/container.go b/lxd/container.go
index 4d4df4b91d..3da50af350 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -705,7 +705,6 @@ type container interface {
OnStart() error
OnStopNS(target string, netns string) error
OnStop(target string) error
- OnNetworkUp(deviceName string, hostVeth string) error
// Properties
Id() int
From d252e154b2737eea72827246bee3c0d7f006fcbb Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 09:54:55 +0100
Subject: [PATCH 07/32] config/devices: Removes old static update fields list
This is replaced with a per-device type update fields list.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/config/devices.go | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lxd/device/config/devices.go b/lxd/device/config/devices.go
index b4842cc813..9bd7bad88c 100644
--- a/lxd/device/config/devices.go
+++ b/lxd/device/config/devices.go
@@ -26,7 +26,7 @@ func (list Devices) Contains(k string, d Device) bool {
}
// Update returns the difference between two sets
-func (list Devices) Update(newlist Devices) (map[string]Device, map[string]Device, map[string]Device, []string) {
+func (list Devices) Update(newlist Devices, updateFields func(Device, Device) []string) (map[string]Device, map[string]Device, map[string]Device, []string) {
rmlist := map[string]Device{}
addlist := map[string]Device{}
updatelist := map[string]Device{}
@@ -60,8 +60,7 @@ func (list Devices) Update(newlist Devices) (map[string]Device, map[string]Devic
}
updateDiff = deviceEqualsDiffKeys(oldDevice, newDevice)
-
- for _, k := range []string{"limits.max", "limits.read", "limits.write", "limits.egress", "limits.ingress", "ipv4.address", "ipv6.address", "ipv4.routes", "ipv6.routes"} {
+ for _, k := range updateFields(oldDevice, newDevice) {
delete(oldDevice, k)
delete(newDevice, k)
}
From f0e94b593e2b6c462a85a7c5d63df3589a044416 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:00:34 +0100
Subject: [PATCH 08/32] container: Adds Type() function to interface
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container.go | 1 +
1 file changed, 1 insertion(+)
diff --git a/lxd/container.go b/lxd/container.go
index 3da50af350..65ece52bfc 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -711,6 +711,7 @@ type container interface {
Location() string
Project() string
Name() string
+ Type() string
Description() string
Architecture() int
CreationDate() time.Time
From 575e65ff828a75145e7f6ce5510a4902c186d0c5 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:01:24 +0100
Subject: [PATCH 09/32] container: Adds InstanceLoadNodeAll link to device
package
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container.go | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/lxd/container.go b/lxd/container.go
index 65ece52bfc..8e61be6a36 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -18,6 +18,7 @@ import (
"github.com/flosch/pongo2"
"github.com/lxc/lxd/lxd/cluster"
"github.com/lxc/lxd/lxd/db"
+ "github.com/lxc/lxd/lxd/device"
"github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/state"
"github.com/lxc/lxd/lxd/sys"
@@ -32,6 +33,24 @@ import (
"github.com/lxc/lxd/shared/units"
)
+func init() {
+ // Expose containerLoadNodeAll to the device package converting the response to a slice of InstanceIdentifiers.
+ // This is because container types are defined in the main package and are not importable.
+ device.InstanceLoadNodeAll = func(s *state.State) ([]device.InstanceIdentifier, error) {
+ containers, err := containerLoadNodeAll(s)
+ if err != nil {
+ return nil, err
+ }
+
+ instances := []device.InstanceIdentifier{}
+ for _, v := range containers {
+ instances = append(instances, device.InstanceIdentifier(v))
+ }
+
+ return instances, nil
+ }
+}
+
// Helper functions
// Returns the parent container name, snapshot name, and whether it actually was
From 97f645693de0e1944ce61cbe4c337f408fc47080 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:03:47 +0100
Subject: [PATCH 10/32] device/utils: Adds InstanceLoadNodeAll var
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_instance.go | 8 ++++++++
lxd/device/{device_utils.go => device_utils_network.go} | 0
2 files changed, 8 insertions(+)
create mode 100644 lxd/device/device_utils_instance.go
rename lxd/device/{device_utils.go => device_utils_network.go} (100%)
diff --git a/lxd/device/device_utils_instance.go b/lxd/device/device_utils_instance.go
new file mode 100644
index 0000000000..52226afb4f
--- /dev/null
+++ b/lxd/device/device_utils_instance.go
@@ -0,0 +1,8 @@
+package device
+
+import (
+ "github.com/lxc/lxd/lxd/state"
+)
+
+// InstanceLoadNodeAll returns all local instance configs.
+var InstanceLoadNodeAll func(s *state.State) ([]InstanceIdentifier, error)
diff --git a/lxd/device/device_utils.go b/lxd/device/device_utils_network.go
similarity index 100%
rename from lxd/device/device_utils.go
rename to lxd/device/device_utils_network.go
From 027e5f98bce2384866708af168d3134440169642 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:04:25 +0100
Subject: [PATCH 11/32] device/utils: Brings VLAN parent interface up
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index 58954dd65e..493760580d 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -147,16 +147,23 @@ func NetworkRemoveInterface(nic string) error {
}
// NetworkCreateVlanDeviceIfNeeded creates a VLAN device if doesn't already exist.
-func NetworkCreateVlanDeviceIfNeeded(parent string, hostName string, vlan string) (bool, error) {
- if vlan != "" {
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", hostName)) {
- _, err := shared.RunCommand("ip", "link", "add", "link", parent, "name", hostName, "up", "type", "vlan", "id", vlan)
+func NetworkCreateVlanDeviceIfNeeded(parent string, vlanDevice string, vlanID string) (bool, error) {
+ if vlanID != "" {
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", vlanDevice)) {
+ // Bring the parent interface up so we can add a vlan to it.
+ _, err := shared.RunCommand("ip", "link", "set", "dev", parent, "up")
+ if err != nil {
+ return false, fmt.Errorf("Failed to bring up parent %s: %v", parent, err)
+ }
+
+ // Add VLAN interface on top of parent.
+ _, err = shared.RunCommand("ip", "link", "add", "link", parent, "name", vlanDevice, "up", "type", "vlan", "id", vlanID)
if err != nil {
return false, err
}
// Attempt to disable IPv6 router advertisement acceptance
- NetworkSysctlSet(fmt.Sprintf("ipv6/conf/%s/accept_ra", hostName), "0")
+ NetworkSysctlSet(fmt.Sprintf("ipv6/conf/%s/accept_ra", vlanDevice), "0")
// We created a new vlan interface, return true
return true, nil
From 60ff51a5f2bbdbacb87673d7c5c09998535038f4 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:05:35 +0100
Subject: [PATCH 12/32] device/utils: Adds veth management functions
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 236 +++++++++++++++++++++++++++++
1 file changed, 236 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index 493760580d..631a98d4f9 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -10,7 +10,10 @@ import (
"strconv"
"strings"
+ "github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/shared"
+ "github.com/lxc/lxd/shared/logger"
+ "github.com/lxc/lxd/shared/units"
)
// NetworkSysctlGet retrieves the value of a sysctl file in /proc/sys/net.
@@ -262,3 +265,236 @@ func NetworkAttachInterface(netName string, devName string) error {
return nil
}
+
+// networkCreateVethPair creates and configures a veth pair. It accepts the name of the host side
+// interface as a parameter and returns the peer interface name.
+func networkCreateVethPair(hostName string, m config.Device) (string, error) {
+ peerName := NetworkRandomDevName("veth")
+
+ _, err := shared.RunCommand("ip", "link", "add", "dev", hostName, "type", "veth", "peer", "name", peerName)
+ if err != nil {
+ return "", fmt.Errorf("Failed to create the veth interfaces %s and %s: %s", hostName, peerName, err)
+ }
+
+ _, err = shared.RunCommand("ip", "link", "set", "dev", hostName, "up")
+ if err != nil {
+ NetworkRemoveInterface(hostName)
+ return "", fmt.Errorf("Failed to bring up the veth interface %s: %s", hostName, err)
+ }
+
+ // Set the MAC address on peer.
+ if m["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", peerName, "address", m["hwaddr"])
+ if err != nil {
+ NetworkRemoveInterface(peerName)
+ return "", fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
+ }
+
+ // Set the MTU on peer.
+ if m["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", peerName, "mtu", m["mtu"])
+ if err != nil {
+ NetworkRemoveInterface(peerName)
+ return "", fmt.Errorf("Failed to set the MTU: %s", err)
+ }
+ }
+
+ return peerName, nil
+}
+
+// networkSetupHostVethDevice configures a nic device's host side veth settings.
+func networkSetupHostVethDevice(device config.Device, oldDevice config.Device, v map[string]string) error {
+ // If not configured, check if volatile data contains the most recently added host_name.
+ if device["host_name"] == "" {
+ device["host_name"] = v["host_name"]
+ }
+
+ // If not configured, check if volatile data contains the most recently added hwaddr.
+ if device["hwaddr"] == "" {
+ device["hwaddr"] = v["hwaddr"]
+ }
+
+ // Check whether host device resolution succeeded.
+ if device["host_name"] == "" {
+ return fmt.Errorf("Failed to find host side veth name for device \"%s\"", device["name"])
+ }
+
+ // Refresh tc limits.
+ err := networkSetVethLimits(device)
+ if err != nil {
+ return err
+ }
+
+ // If oldDevice provided, remove old routes if any remain.
+ if oldDevice != nil {
+ // If not configured, copy the volatile host_name into old device to support live updates.
+ if oldDevice["host_name"] == "" {
+ oldDevice["host_name"] = v["host_name"]
+ }
+
+ // If not configured, copy the volatile host_name into old device to support live updates.
+ if oldDevice["hwaddr"] == "" {
+ oldDevice["hwaddr"] = v["hwaddr"]
+ }
+
+ networkRemoveVethRoutes(oldDevice)
+ }
+
+ // Setup static routes to container.
+ err = networkSetVethRoutes(device)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// networkSetVethRoutes applies any static routes configured from the host to the container nic.
+func networkSetVethRoutes(m config.Device) error {
+ // Decide whether the route should point to the veth parent or the bridge parent.
+ routeDev := m["host_name"]
+ if m["nictype"] == "bridged" {
+ routeDev = m["parent"]
+ }
+
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", routeDev)) {
+ return fmt.Errorf("Unknown or missing host side route interface: %s", routeDev)
+ }
+
+ // Add additional IPv4 routes (using boot proto to avoid conflicts with network static routes)
+ if m["ipv4.routes"] != "" {
+ for _, route := range strings.Split(m["ipv4.routes"], ",") {
+ route = strings.TrimSpace(route)
+ _, err := shared.RunCommand("ip", "-4", "route", "add", route, "dev", routeDev, "proto", "boot")
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ // Add additional IPv6 routes (using boot proto to avoid conflicts with network static routes)
+ if m["ipv6.routes"] != "" {
+ for _, route := range strings.Split(m["ipv6.routes"], ",") {
+ route = strings.TrimSpace(route)
+ _, err := shared.RunCommand("ip", "-6", "route", "add", route, "dev", routeDev, "proto", "boot")
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+// networkRemoveVethRoutes removes any routes created for this device on the host that were first added
+// with networkSetVethRoutes(). Expects to be passed the device config from the oldExpandedDevices.
+func networkRemoveVethRoutes(m config.Device) {
+ // Decide whether the route should point to the veth parent or the bridge parent
+ routeDev := m["host_name"]
+ if m["nictype"] == "bridged" {
+ routeDev = m["parent"]
+ }
+
+ if m["ipv4.routes"] != "" || m["ipv6.routes"] != "" {
+ if routeDev == "" {
+ logger.Errorf("Failed to remove static routes as route dev isn't set")
+ return
+ }
+
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", routeDev)) {
+ return //Routes will already be gone if device doesn't exist.
+ }
+ }
+
+ // Remove IPv4 routes
+ if m["ipv4.routes"] != "" {
+ for _, route := range strings.Split(m["ipv4.routes"], ",") {
+ route = strings.TrimSpace(route)
+ _, err := shared.RunCommand("ip", "-4", "route", "flush", route, "dev", routeDev, "proto", "boot")
+ if err != nil {
+ logger.Errorf("Failed to remove static route: %s to %s: %s", route, routeDev, err)
+ }
+ }
+ }
+
+ // Remove IPv6 routes
+ if m["ipv6.routes"] != "" {
+ for _, route := range strings.Split(m["ipv6.routes"], ",") {
+ route = strings.TrimSpace(route)
+ _, err := shared.RunCommand("ip", "-6", "route", "flush", route, "dev", routeDev, "proto", "boot")
+ if err != nil {
+ logger.Errorf("Failed to remove static route: %s to %s: %s", route, routeDev, err)
+ }
+ }
+ }
+}
+
+// networkSetVethLimits applies any network rate limits to the veth device specified in the config.
+func networkSetVethLimits(m config.Device) error {
+ var err error
+
+ veth := m["host_name"]
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", veth)) {
+ return fmt.Errorf("Unknown or missing host side veth: %s", veth)
+ }
+
+ // Apply max limit
+ if m["limits.max"] != "" {
+ m["limits.ingress"] = m["limits.max"]
+ m["limits.egress"] = m["limits.max"]
+ }
+
+ // Parse the values
+ var ingressInt int64
+ if m["limits.ingress"] != "" {
+ ingressInt, err = units.ParseBitSizeString(m["limits.ingress"])
+ if err != nil {
+ return err
+ }
+ }
+
+ var egressInt int64
+ if m["limits.egress"] != "" {
+ egressInt, err = units.ParseBitSizeString(m["limits.egress"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // Clean any existing entry
+ shared.RunCommand("tc", "qdisc", "del", "dev", veth, "root")
+ shared.RunCommand("tc", "qdisc", "del", "dev", veth, "ingress")
+
+ // Apply new limits
+ if m["limits.ingress"] != "" {
+ out, err := shared.RunCommand("tc", "qdisc", "add", "dev", veth, "root", "handle", "1:0", "htb", "default", "10")
+ if err != nil {
+ return fmt.Errorf("Failed to create root tc qdisc: %s", out)
+ }
+
+ out, err = shared.RunCommand("tc", "class", "add", "dev", veth, "parent", "1:0", "classid", "1:10", "htb", "rate", fmt.Sprintf("%dbit", ingressInt))
+ if err != nil {
+ return fmt.Errorf("Failed to create limit tc class: %s", out)
+ }
+
+ out, err = shared.RunCommand("tc", "filter", "add", "dev", veth, "parent", "1:0", "protocol", "all", "u32", "match", "u32", "0", "0", "flowid", "1:1")
+ if err != nil {
+ return fmt.Errorf("Failed to create tc filter: %s", out)
+ }
+ }
+
+ if m["limits.egress"] != "" {
+ out, err := shared.RunCommand("tc", "qdisc", "add", "dev", veth, "handle", "ffff:0", "ingress")
+ if err != nil {
+ return fmt.Errorf("Failed to create ingress tc qdisc: %s", out)
+ }
+
+ out, err = shared.RunCommand("tc", "filter", "add", "dev", veth, "parent", "ffff:0", "protocol", "all", "u32", "match", "u32", "0", "0", "police", "rate", fmt.Sprintf("%dbit", egressInt), "burst", "1024k", "mtu", "64kb", "drop")
+ if err != nil {
+ return fmt.Errorf("Failed to create ingress tc qdisc: %s", out)
+ }
+ }
+
+ return nil
+}
From 7180b25e7fa8af1afa40b1d805f9cd720e671f6a Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:08:30 +0100
Subject: [PATCH 13/32] container/lxc: Adds Type function
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container_lxc.go | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 003b453e9c..1a1d2db062 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -34,6 +34,7 @@ import (
"github.com/lxc/lxd/lxd/device"
"github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/dnsmasq"
+ "github.com/lxc/lxd/lxd/instance"
"github.com/lxc/lxd/lxd/iptables"
"github.com/lxc/lxd/lxd/maas"
"github.com/lxc/lxd/lxd/project"
@@ -627,6 +628,10 @@ type containerLXC struct {
expiryDate time.Time
}
+func (c *containerLXC) Type() string {
+ return instance.TypeContainer
+}
+
func (c *containerLXC) createOperation(action string, reusable bool, reuse bool) (*lxcContainerOperation, error) {
op, _ := c.getOperation("")
if op != nil {
From 63458baac68c836e67ab870b3a6ba771e32acc25 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:09:29 +0100
Subject: [PATCH 14/32] device/nic: Adds NIC device loader and map
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic.go | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 lxd/device/nic.go
diff --git a/lxd/device/nic.go b/lxd/device/nic.go
new file mode 100644
index 0000000000..3297ec620d
--- /dev/null
+++ b/lxd/device/nic.go
@@ -0,0 +1,23 @@
+package device
+
+import (
+ "github.com/lxc/lxd/lxd/device/config"
+)
+
+// nicTypes defines the supported nic type devices and defines their creation functions.
+var nicTypes = map[string]func() device{
+ "physical": func() device { return &nicPhysical{} },
+ "ipvlan": func() device { return &nicIPVLAN{} },
+ "p2p": func() device { return &nicP2P{} },
+ "bridged": func() device { return &nicBridged{} },
+ "macvlan": func() device { return &nicMACVLAN{} },
+ "sriov": func() device { return &nicSRIOV{} },
+}
+
+// loadNICByType returns a NIC device instantiated with supplied config.
+func loadNICByType(c config.Device) device {
+ if f := nicTypes[c["nictype"]]; f != nil {
+ return f()
+ }
+ return nil
+}
From 3ed5805593de8c96cf800e42f6ab1dcbf0ecdb02 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:10:28 +0100
Subject: [PATCH 15/32] device/nic/ipvlan: Adds IPVLAN NIC device
implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_ipvlan.go | 204 ++++++++++++++++++++
test/suites/container_devices_nic_ipvlan.sh | 30 +++
2 files changed, 234 insertions(+)
create mode 100644 lxd/device/nic_ipvlan.go
diff --git a/lxd/device/nic_ipvlan.go b/lxd/device/nic_ipvlan.go
new file mode 100644
index 0000000000..14bc53888c
--- /dev/null
+++ b/lxd/device/nic_ipvlan.go
@@ -0,0 +1,204 @@
+package device
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/shared"
+)
+
+type nicIPVLAN struct {
+ deviceCommon
+}
+
+func (d *nicIPVLAN) CanHotPlug() (bool, []string) {
+ return false, []string{}
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicIPVLAN) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ extensions := d.state.OS.LXCFeatures
+ if !extensions["network_ipvlan"] || !extensions["network_l2proxy"] || !extensions["network_gateway_device_route"] {
+ return fmt.Errorf("Requires liblxc has following API extensions: network_ipvlan, network_l2proxy, network_gateway_device_route")
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Requires parent property")
+ }
+
+ if d.config["ipv4.address"] != "" {
+ // Check necessary sysctls are configured for use with l2proxy parent in IPVLAN l3s mode.
+ ipv4FwdPath := fmt.Sprintf("ipv4/conf/%s/forwarding", d.config["parent"])
+ sysctlVal, err := NetworkSysctlGet(ipv4FwdPath)
+ if err != nil || sysctlVal != "1\n" {
+ return fmt.Errorf("Error reading net sysctl %s: %v", ipv4FwdPath, err)
+ }
+ if sysctlVal != "1\n" {
+ return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv4.conf.%s.forwarding=1", d.config["parent"])
+ }
+
+ err = NetworkValidAddressV4List(d.config["ipv4.address"])
+ if err != nil {
+ return err
+ }
+ }
+
+ if d.config["ipv6.address"] != "" {
+ // Check necessary sysctls are configured for use with l2proxy parent in IPVLAN l3s mode.
+ ipv6FwdPath := fmt.Sprintf("ipv6/conf/%s/forwarding", d.config["parent"])
+ sysctlVal, err := NetworkSysctlGet(ipv6FwdPath)
+ if err != nil {
+ return fmt.Errorf("Error reading net sysctl %s: %v", ipv6FwdPath, err)
+ }
+ if sysctlVal != "1\n" {
+ return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv6.conf.%s.forwarding=1", d.config["parent"])
+ }
+
+ ipv6ProxyNdpPath := fmt.Sprintf("ipv6/conf/%s/proxy_ndp", d.config["parent"])
+ sysctlVal, err = NetworkSysctlGet(ipv6ProxyNdpPath)
+ if err != nil {
+ return fmt.Errorf("Error reading net sysctl %s: %v", ipv6ProxyNdpPath, err)
+ }
+ if sysctlVal != "1\n" {
+ return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv6.conf.%s.proxy_ndp=1", d.config["parent"])
+ }
+
+ err = NetworkValidAddressV6List(d.config["ipv6.address"])
+ if err != nil {
+ return err
+ }
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "vlan", "ipv4.address", "ipv6.address"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// Start is run when the device is added to the container.
+func (d *nicIPVLAN) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+
+ // Decide which parent we should use based on VLAN setting.
+ parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+
+ createdDev, err := NetworkCreateVlanDeviceIfNeeded(d.config["parent"], parentName, d.config["vlan"])
+ if err != nil {
+ return nil, err
+ }
+
+ // Record whether we created this device or not so it can be removed on stop.
+ saveData["last_state.created"] = fmt.Sprintf("%t", createdDev)
+
+ // If we created a VLAN interface, we need to setup the sysctls on that interface.
+ if createdDev {
+ err := d.setupParentSysctls(parentName)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ nic := []RunConfigItem{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "ipvlan"},
+ {Key: "flags", Value: "up"},
+ {Key: "ipvlan.mode", Value: "l3s"},
+ {Key: "ipvlan.isolation", Value: "bridge"},
+ {Key: "l2proxy", Value: "1"},
+ {Key: "link", Value: parentName},
+ }
+
+ if d.config["mtu"] != "" {
+ nic = append(nic, RunConfigItem{Key: "mtu", Value: d.config["mtu"]})
+ }
+
+ if d.config["ipv4.address"] != "" {
+ for _, addr := range strings.Split(d.config["ipv4.address"], ",") {
+ addr = strings.TrimSpace(addr)
+ nic = append(nic, RunConfigItem{Key: "ipv4.address", Value: fmt.Sprintf("%s/32", addr)})
+ }
+
+ nic = append(nic, RunConfigItem{Key: "ipv4.gateway", Value: "dev"})
+ }
+
+ if d.config["ipv6.address"] != "" {
+ for _, addr := range strings.Split(d.config["ipv6.address"], ",") {
+ addr = strings.TrimSpace(addr)
+ nic = append(nic, RunConfigItem{Key: "ipv6.address", Value: fmt.Sprintf("%s/128", addr)})
+ }
+
+ nic = append(nic, RunConfigItem{Key: "ipv6.gateway", Value: "dev"})
+ }
+
+ runConf.NetworkInterfaces = append(runConf.NetworkInterfaces, nic)
+ return &runConf, nil
+}
+
+// setupParentSysctls configures the required sysctls on the parent to allow l2proxy to work.
+// Because of our policy not to modify sysctls on existing interfaces, this should only be called
+// if we created the parent interface.
+func (d *nicIPVLAN) setupParentSysctls(parentName string) error {
+ if d.config["ipv4.address"] != "" {
+ // Set necessary sysctls for use with l2proxy parent in IPVLAN l3s mode.
+ ipv4FwdPath := fmt.Sprintf("ipv4/conf/%s/forwarding", parentName)
+ err := NetworkSysctlSet(ipv4FwdPath, "1")
+ if err != nil {
+ return fmt.Errorf("Error setting net sysctl %s: %v", ipv4FwdPath, err)
+ }
+ }
+
+ if d.config["ipv6.address"] != "" {
+ // Set necessary sysctls use with l2proxy parent in IPVLAN l3s mode.
+ ipv6FwdPath := fmt.Sprintf("ipv6/conf/%s/forwarding", parentName)
+ err := NetworkSysctlSet(ipv6FwdPath, "1")
+ if err != nil {
+ return fmt.Errorf("Error reading net sysctl %s: %v", ipv6FwdPath, err)
+ }
+
+ ipv6ProxyNdpPath := fmt.Sprintf("ipv6/conf/%s/proxy_ndp", parentName)
+ err = NetworkSysctlSet(ipv6ProxyNdpPath, "1")
+ if err != nil {
+ return fmt.Errorf("Error reading net sysctl %s: %v", ipv6ProxyNdpPath, err)
+ }
+ }
+
+ return nil
+}
+
+// Stop is run when the device is removed from the container.
+func (d *nicIPVLAN) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "last_state.created": "",
+ })
+
+ v := d.volatileGet()
+
+ // This will delete the parent interface if we created it for VLAN parent.
+ if shared.IsTrue(v["last_state.created"]) {
+ parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ err := NetworkRemoveInterface(parentName)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
diff --git a/test/suites/container_devices_nic_ipvlan.sh b/test/suites/container_devices_nic_ipvlan.sh
index 737bd92005..811d2122f4 100644
--- a/test/suites/container_devices_nic_ipvlan.sh
+++ b/test/suites/container_devices_nic_ipvlan.sh
@@ -13,6 +13,9 @@ test_container_devices_nic_ipvlan() {
# Test ipvlan support to offline container (hot plugging not supported).
ip link add "${ct_name}" type dummy
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Check that starting IPVLAN container.
sysctl net.ipv6.conf."${ct_name}".proxy_ndp=1
sysctl net.ipv6.conf."${ct_name}".forwarding=1
@@ -48,6 +51,33 @@ test_container_devices_nic_ipvlan() {
lxc exec "${ct_name}" -- ping6 -c2 -W1 "2001:db8::3${ipRand}"
lxc exec "${ct_name}2" -- ping -c2 -W1 "192.0.2.1${ipRand}"
lxc exec "${ct_name}2" -- ping6 -c2 -W1 "2001:db8::1${ipRand}"
+ lxc stop -f "${ct_name}2"
+
+ # Check IPVLAN ontop of VLAN parent.
+ lxc stop -f "${ct_name}"
+ lxc config device set "${ct_name}" eth0 vlan 1234
+ lxc start "${ct_name}"
+
+ # Check VLAN interface created
+ if ! grep "1" "/sys/class/net/${ct_name}.1234/carrier" ; then
+ echo "vlan interface not created"
+ false
+ fi
+
+ lxc stop -f "${ct_name}"
+
+ # Check parent device is still up.
+ if ! grep "1" "/sys/class/net/${ct_name}/carrier" ; then
+ echo "parent is down"
+ false
+ fi
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
# Cleanup ipvlan checks
lxc delete "${ct_name}" -f
From 7f64edbb823e9c8da6fbeedcc14a163e3932b295 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:10:59 +0100
Subject: [PATCH 16/32] device/nic/macvlan: Adds MACVLAN NIC device
implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_macvlan.go | 134 +++++++++++++++++++
test/suites/container_devices_nic_macvlan.sh | 27 +++-
2 files changed, 159 insertions(+), 2 deletions(-)
create mode 100644 lxd/device/nic_macvlan.go
diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
new file mode 100644
index 0000000000..b6c9df4a9c
--- /dev/null
+++ b/lxd/device/nic_macvlan.go
@@ -0,0 +1,134 @@
+package device
+
+import (
+ "fmt"
+
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/shared"
+)
+
+type nicMACVLAN struct {
+ deviceCommon
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicMACVLAN) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Requires parent property")
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "vlan", "maas.subnet.ipv4", "maas.subnet.ipv6"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// Start is run when the device is added to the container.
+func (d *nicMACVLAN) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+
+ // Decide which parent we should use based on VLAN setting.
+ parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+
+ // Record the temporary device name used for deletion later.
+ saveData["host_name"] = NetworkRandomDevName("mac")
+
+ // Create VLAN parent device if needed.
+ createdDev, err := NetworkCreateVlanDeviceIfNeeded(d.config["parent"], parentName, d.config["vlan"])
+ if err != nil {
+ return nil, err
+ }
+
+ // Record whether we created the parent device or not so it can be removed on stop.
+ saveData["last_state.created"] = fmt.Sprintf("%t", createdDev)
+
+ // Create MACVLAN interface.
+ _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvlan", "mode", "bridge")
+ if err != nil {
+ return nil, err
+ }
+
+ // Set the MAC address.
+ if d.config["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ if err != nil {
+ if createdDev {
+ NetworkRemoveInterface(saveData["host_name"])
+ }
+ return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
+ }
+
+ // Set the MTU.
+ if d.config["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ if err != nil {
+ if createdDev {
+ NetworkRemoveInterface(saveData["host_name"])
+ }
+ return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ }
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ runConf.NetworkInterfaces = [][]RunConfigItem{{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "phys"},
+ {Key: "flags", Value: "up"},
+ {Key: "link", Value: saveData["host_name"]},
+ }}
+
+ return &runConf, nil
+}
+
+// Stop is run when the device is removed from the container.
+func (d *nicMACVLAN) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "host_name": "",
+ "last_state.hwaddr": "",
+ "last_state.mtu": "",
+ "last_state.created": "",
+ })
+
+ errs := []error{}
+ v := d.volatileGet()
+
+ // Delete the detached device.
+ if v["host_name"] != "" && shared.PathExists(fmt.Sprintf("/sys/class/net/%s", v["host_name"])) {
+ err := NetworkRemoveInterface(v["host_name"])
+ if err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ // This will delete the parent interface if we created it for VLAN parent.
+ if shared.IsTrue(v["last_state.created"]) {
+ parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ err := NetworkRemoveInterface(parentName)
+ if err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if len(errs) > 0 {
+ return fmt.Errorf("%v", errs)
+ }
+
+ return nil
+}
diff --git a/test/suites/container_devices_nic_macvlan.sh b/test/suites/container_devices_nic_macvlan.sh
index e436eeb4c0..ca6bb1fdc1 100644
--- a/test/suites/container_devices_nic_macvlan.sh
+++ b/test/suites/container_devices_nic_macvlan.sh
@@ -9,6 +9,9 @@ test_container_devices_nic_macvlan() {
ip link add "${ct_name}" type dummy
ip link set "${ct_name}" up
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Test pre-launch profile config is applied at launch.
lxc profile copy default "${ct_name}"
lxc profile device set "${ct_name}" eth0 parent "${ct_name}"
@@ -54,7 +57,6 @@ test_container_devices_nic_macvlan() {
lxc config device remove "${ct_name}" eth0
# Test hot plugging macvlan device based on vlan parent.
- ip link set "${ct_name}" up
lxc config device add "${ct_name}" eth0 nic \
nictype=macvlan \
parent="${ct_name}" \
@@ -68,8 +70,29 @@ test_container_devices_nic_macvlan() {
false
fi
- # Cleanup.
+ # Check VLAN interface created
+ if ! grep "1" "/sys/class/net/${ct_name}.10/carrier" ; then
+ echo "vlan interface not created"
+ false
+ fi
+
+ # Remove device from container, this should also remove created VLAN parent device.
lxc config device remove "${ct_name}" eth0
+
+ # Check parent device is still up.
+ if ! grep "1" "/sys/class/net/${ct_name}/carrier" ; then
+ echo "parent is down"
+ false
+ fi
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
+
+ # Cleanup.
lxc delete "${ct_name}" -f
lxc delete "${ct_name}2" -f
ip link delete "${ct_name}" type dummy
From 86b90d097a90773cacaeabce8fc542ad30442ebf Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:11:30 +0100
Subject: [PATCH 17/32] device/nic/p2p: Adds P2P NIC device implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_p2p.go | 138 +++++++++++++++++++++++
test/suites/container_devices_nic_p2p.sh | 12 ++
2 files changed, 150 insertions(+)
create mode 100644 lxd/device/nic_p2p.go
diff --git a/lxd/device/nic_p2p.go b/lxd/device/nic_p2p.go
new file mode 100644
index 0000000000..aaaf91c83e
--- /dev/null
+++ b/lxd/device/nic_p2p.go
@@ -0,0 +1,138 @@
+package device
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/lxc/lxd/lxd/device/config"
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/shared"
+)
+
+type nicP2P struct {
+ deviceCommon
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicP2P) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ if d.config["ipv4.routes"] != "" {
+ for _, route := range strings.Split(d.config["ipv4.routes"], ",") {
+ route = strings.TrimSpace(route)
+ err := NetworkValidNetworkV4(route)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ if d.config["ipv6.routes"] != "" {
+ for _, route := range strings.Split(d.config["ipv6.routes"], ",") {
+ route = strings.TrimSpace(route)
+ err := NetworkValidNetworkV6(route)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "host_name", "limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// CanHotPlug returns whether the device can be managed whilst the instance is running, it also
+// returns a list of fields that can be updated without triggering a device remove & add.
+func (d *nicP2P) CanHotPlug() (bool, []string) {
+ return true, []string{"limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes"}
+}
+
+// Start is run when the device is added to the container.
+func (d *nicP2P) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+ saveData["host_name"] = d.config["host_name"]
+ if saveData["host_name"] == "" {
+ saveData["host_name"] = NetworkRandomDevName("veth")
+ }
+
+ var err error
+
+ // Create veth pair and configure the peer end with custom hwaddr and mtu if supplied.
+ peerName, err := networkCreateVethPair(saveData["host_name"], d.config)
+ if err != nil {
+ return nil, err
+ }
+
+ // Apply and host-side limits and routes.
+ err = networkSetupHostVethDevice(d.config, nil, saveData)
+ if err != nil {
+ NetworkRemoveInterface(saveData["host_name"])
+ return nil, err
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ runConf.NetworkInterfaces = [][]RunConfigItem{{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "phys"},
+ {Key: "flags", Value: "up"},
+ {Key: "link", Value: peerName},
+ }}
+
+ return &runConf, nil
+}
+
+// Update applies configuration changes to a started device.
+func (d *nicP2P) Update(oldConfig config.Device, isRunning bool) error {
+ if !isRunning {
+ return nil
+ }
+
+ v := d.volatileGet()
+
+ // Apply and host-side limits and routes.
+ err := networkSetupHostVethDevice(d.config, oldConfig, v)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Stop is run when the device is removed from the container.
+func (d *nicP2P) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "host_name": "",
+ })
+
+ v := d.volatileGet()
+
+ if d.config["host_name"] == "" {
+ d.config["host_name"] = v["host_name"]
+ }
+
+ if d.config["host_name"] != "" && shared.PathExists(fmt.Sprintf("/sys/class/net/%s", d.config["host_name"])) {
+ // Removing host-side end of veth pair will delete the peer end too.
+ err := NetworkRemoveInterface(d.config["host_name"])
+ if err != nil {
+ return fmt.Errorf("Failed to remove interface %s: %s", d.config["host_name"], err)
+ }
+ }
+
+ return nil
+}
diff --git a/test/suites/container_devices_nic_p2p.sh b/test/suites/container_devices_nic_p2p.sh
index 1c94e5a06c..a6db37b026 100644
--- a/test/suites/container_devices_nic_p2p.sh
+++ b/test/suites/container_devices_nic_p2p.sh
@@ -7,6 +7,9 @@ test_container_devices_nic_p2p() {
ctMAC="0A:92:a7:0d:b7:D9"
ipRand=$(shuf -i 0-9 -n 1)
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Test pre-launch profile config is applied at launch.
lxc profile copy default ${ctName}
lxc profile device set ${ctName} eth0 ipv4.routes "192.0.2.1${ipRand}/32"
@@ -331,5 +334,14 @@ test_container_devices_nic_p2p() {
# Test hotplugging nic with new name (rather than updating existing nic).
lxc config device add "${ctName}" eth1 nic nictype=p2p
+ lxc stop -f "${ctName}"
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
+
lxc delete "${ctName}" -f
}
From e31c8108b96298dd8739b5108e5b197ff68f66ee Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:12:08 +0100
Subject: [PATCH 18/32] device/nic/sriov: Adds SR-IOV NIC device implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_sriov.go | 565 +++++++++++++++++++++
test/suites/container_devices_nic_sriov.sh | 12 +-
2 files changed, 576 insertions(+), 1 deletion(-)
create mode 100644 lxd/device/nic_sriov.go
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
new file mode 100644
index 0000000000..847cbe3f81
--- /dev/null
+++ b/lxd/device/nic_sriov.go
@@ -0,0 +1,565 @@
+package device
+
+import (
+ "bufio"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/shared"
+)
+
+type nicSRIOV struct {
+ deviceCommon
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicSRIOV) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Requires parent property")
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "security.mac_filtering", "vlan", "maas.subnet.ipv4", "maas.subnet.ipv6"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// Start is run when the device is added to the container.
+func (d *nicSRIOV) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+
+ instances, err := InstanceLoadNodeAll(d.state)
+ if err != nil {
+ return nil, err
+ }
+
+ // Build a unique set of reserved network devices we cannot use.
+ reservedDevices := map[string]struct{}{}
+ for _, instance := range instances {
+ devices := instance.ExpandedDevices()
+ config := instance.ExpandedConfig()
+ for devName, devConfig := range devices {
+ // Record all parent devices, as these are not eligible for use as VFs.
+ parent := devConfig["parent"]
+ reservedDevices[parent] = struct{}{}
+
+ // If the device has the same parent as us, and a non-empty host_name, then
+ // mark that host_name as reserved, as that device is using it.
+ if devConfig["type"] == "nic" && parent == d.config["parent"] {
+ hostName := config[fmt.Sprintf("volatile.%s.host_name", devName)]
+ if hostName != "" {
+ reservedDevices[hostName] = struct{}{}
+ }
+ }
+ }
+ }
+
+ vfDev, vfID, err := d.findFreeVirtualFunction(reservedDevices)
+ if err != nil {
+ return nil, err
+ }
+
+ err = d.setupSriovParent(vfDev, vfID, saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ // Set the MAC address.
+ if d.config["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
+ }
+
+ // Set the MTU.
+ if d.config["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ }
+ }
+
+ // Bring the interface up.
+ _, err = shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "up")
+ if err != nil {
+ return nil, fmt.Errorf("Failed to bring up the interface: %v", err)
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ runConf.NetworkInterfaces = [][]RunConfigItem{{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "phys"},
+ {Key: "flags", Value: "up"},
+ {Key: "link", Value: saveData["host_name"]},
+ }}
+
+ return &runConf, nil
+}
+
+// Stop is run when the device is removed from the container.
+func (d *nicSRIOV) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "host_name": "",
+ "last_state.hwaddr": "",
+ "last_state.mtu": "",
+ "last_state.created": "",
+ "last_state.vf.id": "",
+ "last_state.vf.hwaddr": "",
+ "last_state.vf.vlan": "",
+ "last_state.vf.spoofcheck": "",
+ })
+
+ v := d.volatileGet()
+
+ err := d.restoreSriovParent(v)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// findFreeVirtualFunction looks on the specified parent device for an unused virtual function.
+// Returns the name of the interface and virtual function index ID if found, error if not.
+func (d *nicSRIOV) findFreeVirtualFunction(reservedDevices map[string]struct{}) (string, int, error) {
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", d.config["parent"])) {
+ return "", 0, fmt.Errorf("Parent device '%s' doesn't exist", d.config["parent"])
+ }
+
+ sriovNumVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_numvfs", d.config["parent"])
+ sriovTotalVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_totalvfs", d.config["parent"])
+
+ // Verify that this is indeed a SR-IOV enabled device.
+ if !shared.PathExists(sriovTotalVFs) {
+ return "", 0, fmt.Errorf("Parent device '%s' doesn't support SR-IOV", d.config["parent"])
+ }
+
+ // Get parent dev_port and dev_id values.
+ pfDevPort, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_port", d.config["parent"]))
+ if err != nil {
+ return "", 0, err
+ }
+
+ pfDevID, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_id", d.config["parent"]))
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Get number of currently enabled VFs.
+ sriovNumVfsBuf, err := ioutil.ReadFile(sriovNumVFs)
+ if err != nil {
+ return "", 0, err
+ }
+ sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
+ sriovNum, err := strconv.Atoi(sriovNumVfsStr)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Get number of possible VFs.
+ sriovTotalVfsBuf, err := ioutil.ReadFile(sriovTotalVFs)
+ if err != nil {
+ return "", 0, err
+ }
+ sriovTotalVfsStr := strings.TrimSpace(string(sriovTotalVfsBuf))
+ sriovTotal, err := strconv.Atoi(sriovTotalVfsStr)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Ensure parent is up (needed for Intel at least).
+ _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "up")
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Check if any VFs are already enabled.
+ nicName := ""
+ vfID := 0
+ for i := 0; i < sriovNum; i++ {
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)) {
+ continue
+ }
+
+ // Check if VF is already in use.
+ empty, err := shared.PathIsEmpty(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i))
+ if err != nil {
+ return "", 0, err
+ }
+ if empty {
+ continue
+ }
+
+ vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)
+ nicName, err = NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Found a free VF.
+ if nicName != "" {
+ vfID = i
+ break
+ }
+ }
+
+ if nicName == "" {
+ if sriovNum == sriovTotal {
+ return "", 0, fmt.Errorf("All virtual functions of sriov device '%s' seem to be in use", d.config["parent"])
+ }
+
+ // Bump the number of VFs to the maximum.
+ err := ioutil.WriteFile(sriovNumVFs, []byte(sriovTotalVfsStr), 0644)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Use next free VF index.
+ for i := sriovNum + 1; i < sriovTotal; i++ {
+ vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)
+ nicName, err = NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Found a free VF.
+ if nicName != "" {
+ vfID = i
+ break
+ }
+ }
+ }
+
+ if nicName == "" {
+ return "", 0, fmt.Errorf("All virtual functions on parent device are already in use")
+ }
+
+ return nicName, vfID, nil
+}
+
+// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original
+// properties of the physical device into voltatile for restoration on detach.
+func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) error {
+ // Retrieve VF settings from parent device.
+ vfInfo, err := d.networkGetVirtFuncInfo(d.config["parent"], vfID)
+ if err != nil {
+ return err
+ }
+
+ // Record properties of VF settings on the parent device.
+ volatile["last_state.vf.hwaddr"] = vfInfo.mac
+ volatile["last_state.vf.id"] = fmt.Sprintf("%d", vfID)
+ volatile["last_state.vf.vlan"] = fmt.Sprintf("%d", vfInfo.vlan)
+ volatile["last_state.vf.spoofcheck"] = fmt.Sprintf("%t", vfInfo.spoofcheck)
+
+ // Record the host interface we represents the VF device which we will move into container.
+ volatile["host_name"] = vfDevice
+ volatile["last_state.created"] = "false" // Indicates don't delete device at stop time.
+
+ // Record properties of VF device.
+ err = networkSnapshotPhysicalNic(volatile["host_name"], volatile)
+ if err != nil {
+ return err
+ }
+
+ // Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
+ vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
+ if err != nil {
+ return err
+ }
+
+ // Get the path to the VF device's driver now, as once it is unbound we won't be able to
+ // determine its driver path in order to rebind it.
+ vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"])
+ if err != nil {
+ return err
+ }
+
+ // Unbind VF device from the host so that the settings will take effect when we rebind it.
+ err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath)
+ if err != nil {
+ return err
+ }
+
+ // However we return from this function, we must try to rebind the VF so its not orphaned.
+ // The OS won't let an already bound device be bound again so is safe to call twice.
+ defer d.networkDeviceBind(vfPCISlot, vfDriverPath)
+
+ // Setup VF VLAN if specified.
+ if d.config["vlan"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "vlan", d.config["vlan"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // Setup VF MAC spoofing protection if specified.
+ // The ordering of this section is very important, as Intel cards require a very specific
+ // order of setup to allow LXD to set custom MACs when using spoof check mode.
+ if shared.IsTrue(d.config["security.mac_filtering"]) {
+ // If no MAC specified in config, use current VF interface MAC.
+ mac := d.config["hwaddr"]
+ if mac == "" {
+ mac = volatile["last_state.vf.hwaddr"]
+ }
+
+ // Set MAC on VF (this combined with spoof checking prevents any other MAC being used).
+ _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", mac)
+ if err != nil {
+ return err
+ }
+
+ // Now that MAC is set on VF, we can enable spoof checking.
+ _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "on")
+ if err != nil {
+ return err
+ }
+ } else {
+ // Reset VF to ensure no previous MAC restriction exists.
+ _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", "00:00:00:00:00:00")
+ if err != nil {
+ return err
+ }
+
+ // Ensure spoof checking is disabled if not enabled in container.
+ _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "off")
+ if err != nil {
+ return err
+ }
+ }
+
+ // Bind VF device onto the host so that the settings will take effect.
+ err = d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ if err != nil {
+ return err
+ }
+
+ // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and
+ // it will re-appear shortly after. Unfortunately the time between sending the bind event
+ // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
+ // otherwise next steps of applying settings to interface will fail.
+ err = d.networkDeviceBindWait(volatile["host_name"])
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// virtFuncInfo holds information about SR-IOV virtual functions.
+type virtFuncInfo struct {
+ mac string
+ vlan int
+ spoofcheck bool
+}
+
+// networkGetVirtFuncInfo returns info about an SR-IOV virtual function from the ip tool.
+func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFuncInfo, err error) {
+ cmd := exec.Command("ip", "link", "show", devName)
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ return
+ }
+ if err = cmd.Start(); err != nil {
+ return
+ }
+ defer stdout.Close()
+
+ // Try and match: "vf 1 MAC 00:00:00:00:00:00, vlan 4095, spoof checking off"
+ reVlan := regexp.MustCompile(fmt.Sprintf(`vf %d MAC ((?:[[:xdigit:]]{2}:){5}[[:xdigit:]]{2}).*, vlan (\d+), spoof checking (\w+)`, vfID))
+
+ // IP link command doesn't show the vlan property if its set to 0, so we need to detect that.
+ // Try and match: "vf 1 MAC 00:00:00:00:00:00, spoof checking off"
+ reNoVlan := regexp.MustCompile(fmt.Sprintf(`vf %d MAC ((?:[[:xdigit:]]{2}:){5}[[:xdigit:]]{2}).*, spoof checking (\w+)`, vfID))
+ scanner := bufio.NewScanner(stdout)
+ for scanner.Scan() {
+ // First try and find VF and reads its properties with VLAN activated.
+ res := reVlan.FindStringSubmatch(scanner.Text())
+ if len(res) == 4 {
+ vlan, err := strconv.Atoi(res[2])
+ if err != nil {
+ return vf, err
+ }
+
+ vf.mac = res[1]
+ vf.vlan = vlan
+ vf.spoofcheck = shared.IsTrue(res[3])
+ return vf, err
+ }
+
+ // Next try and find VF and reads its properties with VLAN missing.
+ res = reNoVlan.FindStringSubmatch(scanner.Text())
+ if len(res) == 3 {
+ vf.mac = res[1]
+ vf.vlan = 0 // Missing VLAN ID means 0 when resetting later.
+ vf.spoofcheck = shared.IsTrue(res[2])
+ return vf, err
+ }
+ }
+ if err = scanner.Err(); err != nil {
+ return
+ }
+
+ return vf, fmt.Errorf("no matching virtual function found")
+}
+
+// networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device.
+func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) {
+ file, err := os.Open(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID))
+ if err != nil {
+ return "", err
+ }
+ defer file.Close()
+
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
+ fields := strings.SplitN(scanner.Text(), "=", 2)
+ if len(fields) == 2 && fields[0] == "PCI_SLOT_NAME" {
+ return fields[1], nil
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+
+ return "", fmt.Errorf("PCI_SLOT_NAME not found")
+}
+
+// networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys.
+func (d *nicSRIOV) networkGetVFDeviceDriverPath(vfID string) (string, error) {
+ return filepath.EvalSymlinks(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/driver", d.config["parent"], vfID))
+}
+
+// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver path.
+func (d *nicSRIOV) networkDeviceUnbind(pciSlotName string, driverPath string) error {
+ return ioutil.WriteFile(fmt.Sprintf("%s/unbind", driverPath), []byte(pciSlotName), 0600)
+}
+
+// networkDeviceUnbind binds a network device to the OS using its PCI Slot Name and driver path.
+func (d *nicSRIOV) networkDeviceBind(pciSlotName string, driverPath string) error {
+ return ioutil.WriteFile(fmt.Sprintf("%s/bind", driverPath), []byte(pciSlotName), 0600)
+}
+
+// networkDeviceBindWait waits for network interface to appear after being binded.
+func (d *nicSRIOV) networkDeviceBindWait(devName string) error {
+ for i := 0; i < 10; i++ {
+ if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", devName)) {
+ return nil
+ }
+
+ time.Sleep(50 * time.Millisecond)
+ }
+
+ return fmt.Errorf("Bind of interface \"%s\" took too long", devName)
+}
+
+// restoreSriovParent restores SR-IOV parent device settings when removed from a container using the
+// volatile data that was stored when the device was first added with setupSriovParent().
+func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error {
+ // Nothing to do if we don't know the original device name or the VF ID.
+ if volatile["host_name"] == "" || volatile["last_state.vf.id"] == "" || d.config["parent"] == "" {
+ return nil
+ }
+
+ // Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
+ vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
+ if err != nil {
+ return err
+ }
+
+ // Get the path to the VF device's driver now, as once it is unbound we won't be able to
+ // determine its driver path in order to rebind it.
+ vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"])
+ if err != nil {
+ return err
+ }
+
+ // Unbind VF device from the host so that the settings will take effect when we rebind it.
+ err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath)
+ if err != nil {
+ return err
+ }
+
+ // However we return from this function, we must try to rebind the VF so its not orphaned.
+ // The OS won't let an already bound device be bound again so is safe to call twice.
+ defer d.networkDeviceBind(vfPCISlot, vfDriverPath)
+
+ // Reset VF VLAN if specified
+ if volatile["last_state.vf.vlan"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "vlan", volatile["last_state.vf.vlan"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // Reset VF MAC spoofing protection if recorded. Do this first before resetting the MAC
+ // to avoid any issues with zero MACs refusing to be set whilst spoof check is on.
+ if volatile["last_state.vf.spoofcheck"] != "" {
+ mode := "off"
+ if shared.IsTrue(volatile["last_state.vf.spoofcheck"]) {
+ mode = "on"
+ }
+
+ _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", mode)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Reset VF MAC specified if specified.
+ if volatile["last_state.vf.hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", volatile["last_state.vf.hwaddr"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // Bind VF device onto the host so that the settings will take effect.
+ err = d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ if err != nil {
+ return err
+ }
+
+ // Wait for VF driver to be reloaded, this will remove the VF interface from the container
+ // and it will re-appear on the host. Unfortunately the time between sending the bind event
+ // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
+ // otherwise next step of restoring MAC and MTU settings in restorePhysicalNic will fail.
+ err = d.networkDeviceBindWait(volatile["host_name"])
+ if err != nil {
+ return err
+ }
+
+ // Restore VF interface settings.
+ err = networkRestorePhysicalNic(volatile["host_name"], volatile)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/test/suites/container_devices_nic_sriov.sh b/test/suites/container_devices_nic_sriov.sh
index 56ee6f7b1a..8c3ccc9cc5 100644
--- a/test/suites/container_devices_nic_sriov.sh
+++ b/test/suites/container_devices_nic_sriov.sh
@@ -20,11 +20,14 @@ test_container_devices_nic_sriov() {
ctName="nt$$"
macRand=$(shuf -i 0-9 -n 1)
ctMAC1="da:da:9d:42:e5:f${macRand}"
- ctMAC2="da:da:9d:42:e5:f${macRand}"
+ ctMAC2="da:da:9d:42:e5:e${macRand}"
# Set a known start point config
ip link set "${parent}" up
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Test basic container with SR-IOV NIC
lxc init testimage "${ctName}"
lxc config device add "${ctName}" eth0 nic \
@@ -122,4 +125,11 @@ test_container_devices_nic_sriov() {
fi
lxc delete -f "${ctName}"
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
}
From 1df59f8ca8d4bb5e5c7828d8f142e840fae25ab2 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 11:43:46 +0100
Subject: [PATCH 19/32] device/nic/bridged: Adds bridged NIC device
implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_bridged.go | 1339 +++++++++++++++++
test/suites/container_devices_nic_bridged.sh | 47 +-
...container_devices_nic_bridged_filtering.sh | 35 +-
3 files changed, 1410 insertions(+), 11 deletions(-)
create mode 100644 lxd/device/nic_bridged.go
diff --git a/lxd/device/nic_bridged.go b/lxd/device/nic_bridged.go
new file mode 100644
index 0000000000..768626027c
--- /dev/null
+++ b/lxd/device/nic_bridged.go
@@ -0,0 +1,1339 @@
+package device
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/binary"
+ "encoding/hex"
+ "fmt"
+ "io/ioutil"
+ "math"
+ "math/big"
+ "math/rand"
+ "net"
+ "os"
+ "strconv"
+ "strings"
+
+ "github.com/google/gopacket"
+ "github.com/google/gopacket/layers"
+ "github.com/mdlayher/eui64"
+
+ "github.com/lxc/lxd/lxd/device/config"
+ "github.com/lxc/lxd/lxd/dnsmasq"
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/lxd/iptables"
+ "github.com/lxc/lxd/shared"
+)
+
+// dhcpAllocation represents an IP allocation from dnsmasq used for IP filtering.
+type dhcpAllocation struct {
+ IP net.IP
+ Name string
+ MAC net.HardwareAddr
+ Static bool
+}
+
+// dhcpRange represents a range of IPs from start to end.
+type dhcpRange struct {
+ Start net.IP
+ End net.IP
+}
+
+type nicBridged struct {
+ deviceCommon
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicBridged) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Requires parent property")
+ }
+
+ managedNetworkFields := []string{"ipv4.address", "ipv6.address", "security.mac_filtering", "security.ipv4_filtering", "security.ipv6_filtering"}
+
+ // If parent isn't a managed network, check no managed-only features aren't enabled.
+ if !shared.PathExists(shared.VarPath("networks", d.config["parent"], "dnsmasq.pid")) {
+ for _, k := range managedNetworkFields {
+ if d.config[k] != "" || shared.IsTrue(d.config[k]) {
+ return fmt.Errorf("%s can only be used with managed networks", k)
+ }
+ }
+ }
+
+ if d.config["ipv4.address"] != "" {
+ err := NetworkValidAddressV4(d.config["ipv4.address"])
+ if err != nil {
+ return err
+ }
+ }
+
+ if d.config["ipv6.address"] != "" {
+ err := NetworkValidAddressV6(d.config["ipv6.address"])
+ if err != nil {
+ return err
+ }
+ }
+
+ if d.config["ipv4.routes"] != "" {
+ for _, route := range strings.Split(d.config["ipv4.routes"], ",") {
+ route = strings.TrimSpace(route)
+ err := NetworkValidNetworkV4(route)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ if d.config["ipv6.routes"] != "" {
+ for _, route := range strings.Split(d.config["ipv6.routes"], ",") {
+ route = strings.TrimSpace(route)
+ err := NetworkValidNetworkV6(route)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "host_name", "limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes", "ipv4.address", "ipv6.address", "security.mac_filtering", "security.ipv4_filtering", "security.ipv6_filtering"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// CanHotPlug returns whether the device can be managed whilst the instance is running, it also
+// returns a list of fields that can be updated without triggering a device remove & add.
+func (d *nicBridged) CanHotPlug() (bool, []string) {
+ return true, []string{"limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes", "ipv4.address", "ipv6.address", "security.mac_filtering", "security.ipv4_filtering", "security.ipv6_filtering"}
+}
+
+// Add is run when a device is added to an instance whether or not the instance is running.
+func (d *nicBridged) Add() error {
+ // Rebuild dnsmasq entry if needed and reload.
+ err := d.rebuildDnsmasqEntry()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Start is run when the device is added to the instance and instance is starting or running.
+func (d *nicBridged) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+ saveData["host_name"] = d.config["host_name"]
+ if saveData["host_name"] == "" {
+ saveData["host_name"] = NetworkRandomDevName("veth")
+ }
+
+ var err error
+
+ // Create veth pair and configure the peer end with custom hwaddr and mtu if supplied.
+ peerName, err := networkCreateVethPair(saveData["host_name"], d.config)
+ if err != nil {
+ return nil, err
+ }
+
+ // Apply and host-side limits and routes.
+ err = networkSetupHostVethDevice(d.config, nil, saveData)
+ if err != nil {
+ NetworkRemoveInterface(saveData["host_name"])
+ return nil, err
+ }
+
+ // Apply and host-side network filters (uses enriched host_name from networkSetupHostVethDevice).
+ err = d.setupHostFilters(nil)
+ if err != nil {
+ NetworkRemoveInterface(saveData["host_name"])
+ return nil, err
+ }
+
+ // Attach host side veth interface to bridge.
+ err = NetworkAttachInterface(d.config["parent"], saveData["host_name"])
+ if err != nil {
+ NetworkRemoveInterface(saveData["host_name"])
+ return nil, err
+ }
+
+ // Attempt to disable router advertisement acceptance.
+ err = NetworkSysctlSet(fmt.Sprintf("ipv6/conf/%s/accept_ra", saveData["host_name"]), "0")
+ if err != nil && !os.IsNotExist(err) {
+ NetworkRemoveInterface(saveData["host_name"])
+ return nil, err
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ runConf.NetworkInterfaces = [][]RunConfigItem{{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "phys"},
+ {Key: "flags", Value: "up"},
+ {Key: "link", Value: peerName},
+ }}
+
+ return &runConf, nil
+}
+
+// Update applies configuration changes to a started device.
+func (d *nicBridged) Update(oldConfig config.Device, isRunning bool) error {
+ // If an IPv6 address has changed, flush all existing IPv6 leases for instance so instance
+ // isn't allocated old IP. This is important with IPv6 because DHCPv6 supports multiple IP
+ // address allocation and would result in instance having leases for both old and new IPs.
+ if d.config["hwaddr"] != "" && d.config["ipv6.address"] != oldConfig["ipv6.address"] {
+ err := d.networkClearLease(d.instance.Name(), d.config["parent"], d.config["hwaddr"], clearLeaseIPv6Only)
+ if err != nil {
+ return err
+ }
+ }
+
+ v := d.volatileGet()
+
+ // If instance is running, apply host side limits and filters first before rebuilding
+ // dnsmasq config below so that existing config can be used as part of the filter removal.
+ if isRunning {
+ // Apply and host-side limits and routes.
+ err := networkSetupHostVethDevice(d.config, oldConfig, v)
+ if err != nil {
+ return err
+ }
+
+ // Apply and host-side network filters (uses enriched host_name from networkSetupHostVethDevice).
+ err = d.setupHostFilters(oldConfig)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Rebuild dnsmasq entry if needed and reload.
+ err := d.rebuildDnsmasqEntry()
+ if err != nil {
+ return err
+ }
+
+ // If an IPv6 address has changed, if the instance is running we should bounce the host-side
+ // veth interface to give the instance a chance to detect the change and re-apply for an
+ // updated lease with new IP address.
+ if d.config["ipv6.address"] != oldConfig["ipv6.address"] && v["host_name"] != "" && shared.PathExists(fmt.Sprintf("/sys/class/net/%s", v["host_name"])) {
+ _, err := shared.RunCommand("ip", "link", "set", v["host_name"], "down")
+ if err != nil {
+ return err
+ }
+ _, err = shared.RunCommand("ip", "link", "set", v["host_name"], "up")
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// Stop is run when the device is removed from the instance.
+func (d *nicBridged) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "host_name": "",
+ })
+
+ v := d.volatileGet()
+
+ if d.config["host_name"] == "" {
+ d.config["host_name"] = v["host_name"]
+ }
+
+ if d.config["hwaddr"] == "" {
+ d.config["hwaddr"] = v["hwaddr"]
+ }
+
+ if d.config["host_name"] != "" && shared.PathExists(fmt.Sprintf("/sys/class/net/%s", d.config["host_name"])) {
+ // Removing host-side end of veth pair will delete the peer end too.
+ err := NetworkRemoveInterface(d.config["host_name"])
+ if err != nil {
+ return fmt.Errorf("Failed to remove interface %s: %s", d.config["host_name"], err)
+ }
+ }
+
+ networkRemoveVethRoutes(d.config)
+ d.removeFilters(d.config)
+
+ return nil
+}
+
+// Remove is run when the instance is deleted.
+func (d *nicBridged) Remove() error {
+ err := d.networkClearLease(d.instance.Name(), d.config["parent"], d.config["hwaddr"], clearLeaseAll)
+ if err != nil {
+ return err
+ }
+
+ // If device was on managed parent, remove old config file.
+ if d.config["parent"] != "" && shared.PathExists(shared.VarPath("networks", d.config["parent"], "dnsmasq.pid")) {
+ dnsmasq.ConfigMutex.Lock()
+ defer dnsmasq.ConfigMutex.Unlock()
+
+ err := dnsmasq.RemoveStaticEntry(d.config["parent"], d.instance.Project(), d.instance.Name())
+ if err != nil {
+ return err
+ }
+
+ // Reload dnsmasq to apply new settings.
+ err = dnsmasq.Kill(d.config["parent"], true)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// rebuildDnsmasqEntry rebuilds the dnsmasq host entry if connected to an LXD managed network
+// and reloads dnsmasq.
+func (d *nicBridged) rebuildDnsmasqEntry() error {
+ // Rebuild dnsmasq config if a bridged device has changed and parent is a managed network.
+ if !shared.PathExists(shared.VarPath("networks", d.config["parent"], "dnsmasq.pid")) {
+ return nil
+ }
+
+ dnsmasq.ConfigMutex.Lock()
+ defer dnsmasq.ConfigMutex.Unlock()
+
+ _, dbInfo, err := d.state.Cluster.NetworkGet(d.config["parent"])
+ if err != nil {
+ return err
+ }
+
+ netConfig := dbInfo.Config
+ ipv4Address := d.config["ipv4.address"]
+ ipv6Address := d.config["ipv6.address"]
+
+ // If IP filtering is enabled, and no static IP in config, check if there is already a
+ // dynamically assigned static IP in dnsmasq config and write that back out in new config.
+ if (shared.IsTrue(d.config["security.ipv4_filtering"]) && ipv4Address == "") || (shared.IsTrue(d.config["security.ipv6_filtering"]) && ipv6Address == "") {
+ curIPv4, curIPv6, err := dnsmasq.DHCPStaticIPs(d.config["parent"], d.instance.Name())
+ if err != nil && !os.IsNotExist(err) {
+ return err
+ }
+
+ if ipv4Address == "" && curIPv4.IP != nil {
+ ipv4Address = curIPv4.IP.String()
+ }
+
+ if ipv6Address == "" && curIPv6.IP != nil {
+ ipv6Address = curIPv6.IP.String()
+ }
+ }
+
+ err = dnsmasq.UpdateStaticEntry(d.config["parent"], d.instance.Project(), d.instance.Name(), netConfig, d.config["hwaddr"], ipv4Address, ipv6Address)
+ if err != nil {
+ return err
+ }
+
+ // Reload dnsmasq to apply new settings.
+ err = dnsmasq.Kill(d.config["parent"], true)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// setupHostFilters applies any host side network filters.
+func (d *nicBridged) setupHostFilters(oldConfig config.Device) error {
+ // Remove any old network filters if non-empty oldConfig supplied as part of update.
+ if oldConfig != nil && (shared.IsTrue(oldConfig["security.mac_filtering"]) || shared.IsTrue(oldConfig["security.ipv4_filtering"]) || shared.IsTrue(oldConfig["security.ipv6_filtering"])) {
+ d.removeFilters(oldConfig)
+ }
+
+ // Setup network filters.
+ if shared.IsTrue(d.config["security.mac_filtering"]) || shared.IsTrue(d.config["security.ipv4_filtering"]) || shared.IsTrue(d.config["security.ipv6_filtering"]) {
+ err := d.setFilters()
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// removeFilters removes any network level filters defined for the instance.
+func (d *nicBridged) removeFilters(m config.Device) error {
+ if m["hwaddr"] == "" {
+ return fmt.Errorf("Failed to remove network filters for %s: hwaddr not defined", m["name"])
+ }
+
+ if m["host_name"] == "" {
+ return fmt.Errorf("Failed to remove network filters for %s: host_name not defined", m["name"])
+ }
+
+ // Remove any IPv6 filters used for this instance.
+ err := iptables.ContainerClear("ipv6", fmt.Sprintf("%s - ipv6_filtering", d.instance.Name()), "filter")
+ if err != nil {
+ return fmt.Errorf("Failed to clear ip6tables ipv6_filter rules for %s: %v", m["name"], err)
+ }
+
+ // Read current static IP allocation configured from dnsmasq host config (if exists).
+ IPv4, IPv6, err := d.getDHCPStaticIPs(m["parent"], d.instance.Name())
+ if err != nil {
+ return fmt.Errorf("Failed to remove network filters for %s: %v", m["name"], err)
+ }
+
+ // Get a current list of rules active on the host.
+ out, err := shared.RunCommand("ebtables", "-L", "--Lmac2", "--Lx")
+ if err != nil {
+ return fmt.Errorf("Failed to remove network filters for %s: %v", m["name"], err)
+ }
+
+ // Get a list of rules that we would have applied on instance start.
+ rules := d.generateFilterEbtablesRules(m, IPv4.IP, IPv6.IP)
+
+ errs := []error{}
+ // Iterate through each active rule on the host and try and match it to one the LXD rules.
+ for _, line := range strings.Split(out, "\n") {
+ line = strings.TrimSpace(line)
+ fields := strings.Fields(line)
+ fieldsLen := len(fields)
+
+ for _, rule := range rules {
+ // Rule doesn't match if the field lenths aren't the same, move on.
+ if len(rule) != fieldsLen {
+ continue
+ }
+
+ // Check whether active rule matches one of our rules to delete.
+ if !d.matchEbtablesRule(fields, rule, true) {
+ continue
+ }
+
+ // If we get this far, then the current host rule matches one of our LXD
+ // rules, so we should run the modified command to delete it.
+ _, err = shared.RunCommand(fields[0], fields[1:]...)
+ if err != nil {
+ errs = append(errs, err)
+ }
+ }
+ }
+
+ if len(errs) > 0 {
+ return fmt.Errorf("Failed to remove network filters rule for %s: %v", m["name"], errs)
+ }
+
+ return nil
+}
+
+// getDHCPStaticContainerIPs retrieves the dnsmasq statically allocated IPs for a instance.
+// Returns IPv4 and IPv6 dhcpAllocation structs respectively.
+func (d *nicBridged) getDHCPStaticIPs(network string, instanceName string) (dhcpAllocation, dhcpAllocation, error) {
+ var IPv4, IPv6 dhcpAllocation
+
+ file, err := os.Open(shared.VarPath("networks", network, "dnsmasq.hosts") + "/" + instanceName)
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+ defer file.Close()
+
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ fields := strings.SplitN(scanner.Text(), ",", -1)
+ for _, field := range fields {
+ // Check if field is IPv4 or IPv6 address.
+ if strings.Count(field, ".") == 3 {
+ IP := net.ParseIP(field)
+ if IP.To4() == nil {
+ return IPv4, IPv6, fmt.Errorf("Error parsing IP address: %v", field)
+ }
+ IPv4 = dhcpAllocation{Name: d.instance.Name(), Static: true, IP: IP.To4()}
+
+ } else if strings.HasPrefix(field, "[") && strings.HasSuffix(field, "]") {
+ IP := net.ParseIP(field[1 : len(field)-1])
+ if IP == nil {
+ return IPv4, IPv6, fmt.Errorf("Error parsing IP address: %v", field)
+ }
+ IPv6 = dhcpAllocation{Name: d.instance.Name(), Static: true, IP: IP}
+ }
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return IPv4, IPv6, err
+ }
+
+ return IPv4, IPv6, nil
+}
+
+// generateFilterEbtablesRules returns a customised set of ebtables filter rules based on the device.
+func (d *nicBridged) generateFilterEbtablesRules(m config.Device, IPv4 net.IP, IPv6 net.IP) [][]string {
+ // MAC source filtering rules. Blocks any packet coming from instance with an incorrect Ethernet source MAC.
+ // This is required for IP filtering too.
+ rules := [][]string{
+ {"ebtables", "-t", "filter", "-A", "INPUT", "-s", "!", m["hwaddr"], "-i", m["host_name"], "-j", "DROP"},
+ {"ebtables", "-t", "filter", "-A", "FORWARD", "-s", "!", m["hwaddr"], "-i", m["host_name"], "-j", "DROP"},
+ }
+
+ if shared.IsTrue(m["security.ipv4_filtering"]) && IPv4 != nil {
+ rules = append(rules,
+ // Prevent ARP MAC spoofing (prevents the instance poisoning the ARP cache of its neighbours with a MAC address that isn't its own).
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "ARP", "-i", m["host_name"], "--arp-mac-src", "!", m["hwaddr"], "-j", "DROP"},
+ []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "ARP", "-i", m["host_name"], "--arp-mac-src", "!", m["hwaddr"], "-j", "DROP"},
+ // Prevent ARP IP spoofing (prevents the instance redirecting traffic for IPs that are not its own).
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "ARP", "-i", m["host_name"], "--arp-ip-src", "!", IPv4.String(), "-j", "DROP"},
+ []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "ARP", "-i", m["host_name"], "--arp-ip-src", "!", IPv4.String(), "-j", "DROP"},
+ // Allow DHCPv4 to the host only. This must come before the IP source filtering rules below.
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv4", "-s", m["hwaddr"], "-i", m["host_name"], "--ip-src", "0.0.0.0", "--ip-dst", "255.255.255.255", "--ip-proto", "udp", "--ip-dport", "67", "-j", "ACCEPT"},
+ // IP source filtering rules. Blocks any packet coming from instance with an incorrect IP source address.
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv4", "-i", m["host_name"], "--ip-src", "!", IPv4.String(), "-j", "DROP"},
+ []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "IPv4", "-i", m["host_name"], "--ip-src", "!", IPv4.String(), "-j", "DROP"},
+ )
+ }
+
+ if shared.IsTrue(m["security.ipv6_filtering"]) && IPv6 != nil {
+ rules = append(rules,
+ // Allow DHCPv6 and Router Solicitation to the host only. This must come before the IP source filtering rules below.
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-s", m["hwaddr"], "-i", m["host_name"], "--ip6-src", "fe80::/ffc0::", "--ip6-dst", "ff02::1:2/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "--ip6-proto", "udp", "--ip6-dport", "547", "-j", "ACCEPT"},
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-s", m["hwaddr"], "-i", m["host_name"], "--ip6-src", "fe80::/ffc0::", "--ip6-dst", "ff02::2/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "--ip6-proto", "ipv6-icmp", "--ip6-icmp-type", "router-solicitation", "-j", "ACCEPT"},
+ // IP source filtering rules. Blocks any packet coming from instance with an incorrect IP source address.
+ []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-i", m["host_name"], "--ip6-src", "!", fmt.Sprintf("%s/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", IPv6.String()), "-j", "DROP"},
+ []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "IPv6", "-i", m["host_name"], "--ip6-src", "!", fmt.Sprintf("%s/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", IPv6.String()), "-j", "DROP"},
+ )
+ }
+
+ return rules
+}
+
+// matchEbtablesRule compares an active rule to a supplied match rule to see if they match.
+// If deleteMode is true then the "-A" flag in the active rule will be modified to "-D" and will
+// not be part of the equality match. This allows delete commands to be generated from dumped add commands.
+func (d *nicBridged) matchEbtablesRule(activeRule []string, matchRule []string, deleteMode bool) bool {
+ for i := range matchRule {
+ // Active rules will be dumped in "add" format, we need to detect
+ // this and switch it to "delete" mode if requested. If this has already been
+ // done then move on, as we don't want to break the comparison below.
+ if deleteMode && (activeRule[i] == "-A" || activeRule[i] == "-D") {
+ activeRule[i] = "-D"
+ continue
+ }
+
+ // Check the match rule field matches the active rule field.
+ // If they don't match, then this isn't one of our rules.
+ if activeRule[i] != matchRule[i] {
+ return false
+ }
+ }
+
+ return true
+}
+
+// setFilters sets up any network level filters defined for the instance.
+// These are controlled by the security.mac_filtering, security.ipv4_Filtering and security.ipv6_filtering config keys.
+func (d *nicBridged) setFilters() (err error) {
+ if d.config["hwaddr"] == "" {
+ return fmt.Errorf("Failed to set network filters: require hwaddr defined")
+ }
+
+ if d.config["host_name"] == "" {
+ return fmt.Errorf("Failed to set network filters: require host_name defined")
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Failed to set network filters: require parent defined")
+ }
+
+ if shared.IsTrue(d.config["security.ipv6_filtering"]) {
+ // Check br_netfilter is loaded and enabled for IPv6.
+ sysctlPath := "bridge/bridge-nf-call-ip6tables"
+ sysctlVal, err := NetworkSysctlGet(sysctlPath)
+ if err != nil {
+ return fmt.Errorf("Error reading net sysctl %s: %v", sysctlPath, err)
+ }
+
+ if sysctlVal != "1\n" {
+ return fmt.Errorf("security.ipv6_filtering requires br_netfilter and sysctl net.bridge.bridge-nf-call-ip6tables=1")
+ }
+ }
+
+ // Retrieve existing IPs, or allocate new ones if needed.
+ IPv4, IPv6, err := d.allocateFilterIPs()
+
+ // If anything goes wrong, clean up so we don't leave orphaned rules.
+ defer func() {
+ if err != nil {
+ d.removeFilters(d.config)
+ }
+ }()
+
+ rules := d.generateFilterEbtablesRules(d.config, IPv4, IPv6)
+ for _, rule := range rules {
+ _, err = shared.RunCommand(rule[0], rule[1:]...)
+ if err != nil {
+ return err
+ }
+ }
+
+ rules, err = d.generateFilterIptablesRules(d.config, IPv6)
+ if err != nil {
+ return err
+ }
+
+ for _, rule := range rules {
+ err = iptables.ContainerPrepend(rule[0], fmt.Sprintf("%s - %s_filtering", d.instance.Name(), rule[0]), "filter", rule[1], rule[2:]...)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// networkAllocateVethFilterIPs retrieves previously allocated IPs, or allocate new ones if needed.
+func (d *nicBridged) allocateFilterIPs() (net.IP, net.IP, error) {
+ var IPv4, IPv6 net.IP
+
+ // Check if there is a valid static IPv4 address defined.
+ if d.config["ipv4.address"] != "" {
+ IPv4 = net.ParseIP(d.config["ipv4.address"])
+ if IPv4 == nil {
+ return IPv4, IPv6, fmt.Errorf("Invalid static IPv4 address %s", d.config["ipv4.address"])
+ }
+ }
+
+ // Check if there is a valid static IPv6 address defined.
+ if d.config["ipv6.address"] != "" {
+ IPv6 = net.ParseIP(d.config["ipv6.address"])
+ if IPv6 == nil {
+ return IPv4, IPv6, fmt.Errorf("Invalid static IPv6 address %s", d.config["ipv6.address"])
+ }
+ }
+
+ dnsmasq.ConfigMutex.Lock()
+ defer dnsmasq.ConfigMutex.Unlock()
+
+ // Read current static IP allocation configured from dnsmasq host config (if exists).
+ curIPv4, curIPv6, err := d.getDHCPStaticIPs(d.config["parent"], d.instance.Name())
+ if err != nil && !os.IsNotExist(err) {
+ return IPv4, IPv6, err
+ }
+
+ _, dbInfo, err := d.state.Cluster.NetworkGet(d.config["parent"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+
+ netConfig := dbInfo.Config
+
+ // If no static IPv4, then check if there is a valid static DHCP IPv4 address defined.
+ if IPv4 == nil && curIPv4.IP != nil {
+ _, subnet, err := net.ParseCIDR(netConfig["ipv4.address"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+
+ // Check the existing static DHCP IP is still valid in the subnet & ranges, if not
+ // then we'll need to generate a new one.
+ ranges := d.networkDHCPv4Ranges(netConfig)
+ if d.networkDHCPValidIP(subnet, ranges, curIPv4.IP.To4()) {
+ IPv4 = curIPv4.IP.To4()
+ }
+ }
+
+ // If no static IPv6, then check if there is a valid static DHCP IPv6 address defined.
+ if IPv6 == nil && curIPv6.IP != nil {
+ _, subnet, err := net.ParseCIDR(netConfig["ipv6.address"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+
+ // Check the existing static DHCP IP is still valid in the subnet & ranges, if not
+ // then we'll need to generate a new one.
+ ranges := d.networkDHCPv6Ranges(netConfig)
+ if d.networkDHCPValidIP(subnet, ranges, curIPv6.IP.To16()) {
+ IPv6 = curIPv6.IP.To16()
+ }
+ }
+
+ // If we need to generate either a new IPv4 or IPv6, load existing IPs used in network.
+ if IPv4 == nil || IPv6 == nil {
+ // Get existing allocations in network.
+ IPv4Allocs, IPv6Allocs, err := d.getDHCPAllocatedIPs(d.config["parent"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+
+ // Allocate a new IPv4 address is IPv4 filtering enabled.
+ if IPv4 == nil && shared.IsTrue(d.config["security.ipv4_filtering"]) {
+ IPv4, err = d.getDHCPFreeIPv4(IPv4Allocs, netConfig, d.instance.Name(), d.config["hwaddr"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+ }
+
+ // Allocate a new IPv6 address is IPv6 filtering enabled.
+ if IPv6 == nil && shared.IsTrue(d.config["security.ipv6_filtering"]) {
+ IPv6, err = d.getDHCPFreeIPv6(IPv6Allocs, netConfig, d.instance.Name(), d.config["hwaddr"])
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+ }
+ }
+
+ // If either IPv4 or IPv6 assigned is different than what is in dnsmasq config, rebuild config.
+ if (IPv4 != nil && bytes.Compare(curIPv4.IP, IPv4.To4()) != 0) || (IPv6 != nil && bytes.Compare(curIPv6.IP, IPv6.To16()) != 0) {
+ var IPv4Str, IPv6Str string
+
+ if IPv4 != nil {
+ IPv4Str = IPv4.String()
+ }
+
+ if IPv6 != nil {
+ IPv6Str = IPv6.String()
+ }
+
+ err = dnsmasq.UpdateStaticEntry(d.config["parent"], d.instance.Project(), d.instance.Name(), netConfig, d.config["hwaddr"], IPv4Str, IPv6Str)
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+
+ err = dnsmasq.Kill(d.config["parent"], true)
+ if err != nil {
+ return IPv4, IPv6, err
+ }
+ }
+
+ return IPv4, IPv6, nil
+}
+
+// generateFilterIptablesRules returns a customised set of iptables filter rules based on the device.
+func (d *nicBridged) generateFilterIptablesRules(m config.Device, IPv6 net.IP) (rules [][]string, err error) {
+ mac, err := net.ParseMAC(m["hwaddr"])
+ if err != nil {
+ return
+ }
+
+ macHex := hex.EncodeToString(mac)
+
+ // These rules below are implemented using ip6tables because the functionality to inspect
+ // the contents of an ICMPv6 packet does not exist in ebtables (unlike for IPv4 ARP).
+ // Additionally, ip6tables doesn't really provide a nice way to do what we need here, so we
+ // have resorted to doing a raw hex comparison of the packet contents at fixed positions.
+ // If these rules are not added then it is possible to hijack traffic for another IP that is
+ // not assigned to the instance by sending a specially crafted gratuitous NDP packet with
+ // correct source address and MAC at the IP & ethernet layers, but a fraudulent IP or MAC
+ // inside the ICMPv6 NDP packet.
+ if shared.IsTrue(m["security.ipv6_filtering"]) && IPv6 != nil {
+ ipv6Hex := hex.EncodeToString(IPv6)
+
+ rules = append(rules,
+ // Prevent Neighbor Advertisement IP spoofing (prevents the instance redirecting traffic for IPs that are not its own).
+ []string{"ipv6", "INPUT", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", ipv6Hex), "--algo", "bm", "--from", "48", "--to", "64", "-j", "DROP"},
+ []string{"ipv6", "FORWARD", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", ipv6Hex), "--algo", "bm", "--from", "48", "--to", "64", "-j", "DROP"},
+ // Prevent Neighbor Advertisement MAC spoofing (prevents the instance poisoning the NDP cache of its neighbours with a MAC address that isn't its own).
+ []string{"ipv6", "INPUT", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", macHex), "--algo", "bm", "--from", "66", "--to", "72", "-j", "DROP"},
+ []string{"ipv6", "FORWARD", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", macHex), "--algo", "bm", "--from", "66", "--to", "72", "-j", "DROP"},
+ )
+ }
+
+ return
+}
+
+// networkDHCPv4Ranges returns a parsed set of DHCPv4 ranges for a particular network.
+func (d *nicBridged) networkDHCPv4Ranges(netConfig map[string]string) []dhcpRange {
+ dhcpRanges := make([]dhcpRange, 0)
+ if netConfig["ipv4.dhcp.ranges"] != "" {
+ for _, r := range strings.Split(netConfig["ipv4.dhcp.ranges"], ",") {
+ parts := strings.SplitN(strings.TrimSpace(r), "-", 2)
+ if len(parts) == 2 {
+ startIP := net.ParseIP(parts[0])
+ endIP := net.ParseIP(parts[1])
+ dhcpRanges = append(dhcpRanges, dhcpRange{
+ Start: startIP.To4(),
+ End: endIP.To4(),
+ })
+ }
+ }
+ }
+
+ return dhcpRanges
+}
+
+// networkDHCPv6Ranges returns a parsed set of DHCPv6 ranges for a particular network.
+func (d *nicBridged) networkDHCPv6Ranges(netConfig map[string]string) []dhcpRange {
+ dhcpRanges := make([]dhcpRange, 0)
+ if netConfig["ipv6.dhcp.ranges"] != "" {
+ for _, r := range strings.Split(netConfig["ipv6.dhcp.ranges"], ",") {
+ parts := strings.SplitN(strings.TrimSpace(r), "-", 2)
+ if len(parts) == 2 {
+ startIP := net.ParseIP(parts[0])
+ endIP := net.ParseIP(parts[1])
+ dhcpRanges = append(dhcpRanges, dhcpRange{
+ Start: startIP.To16(),
+ End: endIP.To16(),
+ })
+ }
+ }
+ }
+
+ return dhcpRanges
+}
+
+// networkDHCPValidIP returns whether an IP fits inside one of the supplied DHCP ranges and subnet.
+func (d *nicBridged) networkDHCPValidIP(subnet *net.IPNet, ranges []dhcpRange, IP net.IP) bool {
+ inSubnet := subnet.Contains(IP)
+ if !inSubnet {
+ return false
+ }
+
+ if len(ranges) > 0 {
+ for _, IPRange := range ranges {
+ if bytes.Compare(IP, IPRange.Start) >= 0 && bytes.Compare(IP, IPRange.End) <= 0 {
+ return true
+ }
+ }
+ } else if inSubnet {
+ return true
+ }
+
+ return false
+}
+
+// getDHCPAllocatedIPs returns a map of IPs currently allocated (statically and dynamically)
+// in dnsmasq for a specific network. The returned map is keyed by a 16 byte array representing
+// the net.IP format. The value of each map item is a dhcpAllocation struct containing at least
+// whether the allocation was static or dynamic and optionally instance name or MAC address.
+// MAC addresses are only included for dynamic IPv4 allocations (where name is not reliable).
+// Static allocations are not overridden by dynamic allocations, allowing for instance name to be
+// included for static IPv6 allocations. IPv6 addresses that are dynamically assigned cannot be
+// reliably linked to instances using either name or MAC because dnsmasq does not record the MAC
+// address for these records, and the recorded host name can be set by the instance if the dns.mode
+// for the network is set to "dynamic" and so cannot be trusted, so in this case we do not return
+// any identifying info.
+func (d *nicBridged) getDHCPAllocatedIPs(network string) (map[[4]byte]dhcpAllocation, map[[16]byte]dhcpAllocation, error) {
+ IPv4s := make(map[[4]byte]dhcpAllocation)
+ IPv6s := make(map[[16]byte]dhcpAllocation)
+
+ // First read all statically allocated IPs.
+ files, err := ioutil.ReadDir(shared.VarPath("networks", network, "dnsmasq.hosts"))
+ if err != nil {
+ return IPv4s, IPv6s, err
+ }
+
+ for _, entry := range files {
+ IPv4, IPv6, err := d.getDHCPStaticIPs(network, entry.Name())
+ if err != nil {
+ return IPv4s, IPv6s, err
+ }
+
+ if IPv4.IP != nil {
+ var IPKey [4]byte
+ copy(IPKey[:], IPv4.IP.To4())
+ IPv4s[IPKey] = IPv4
+ }
+
+ if IPv6.IP != nil {
+ var IPKey [16]byte
+ copy(IPKey[:], IPv6.IP.To16())
+ IPv6s[IPKey] = IPv6
+ }
+ }
+
+ // Next read all dynamic allocated IPs.
+ file, err := os.Open(shared.VarPath("networks", network, "dnsmasq.leases"))
+ if err != nil {
+ return IPv4s, IPv6s, err
+ }
+ defer file.Close()
+
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ fields := strings.Fields(scanner.Text())
+ if len(fields) == 5 {
+ IP := net.ParseIP(fields[2])
+ if IP == nil {
+ return IPv4s, IPv6s, fmt.Errorf("Error parsing IP address: %v", fields[2])
+ }
+
+ // Handle IPv6 addresses.
+ if IP.To4() == nil {
+ var IPKey [16]byte
+ copy(IPKey[:], IP.To16())
+
+ // Don't replace IPs from static config as more reliable.
+ if IPv6s[IPKey].Name != "" {
+ continue
+ }
+
+ IPv6s[IPKey] = dhcpAllocation{
+ Static: false,
+ IP: IP.To16(),
+ }
+ } else {
+ // MAC only available in IPv4 leases.
+ MAC, err := net.ParseMAC(fields[1])
+ if err != nil {
+ return IPv4s, IPv6s, err
+ }
+
+ var IPKey [4]byte
+ copy(IPKey[:], IP.To4())
+
+ // Don't replace IPs from static config as more reliable.
+ if IPv4s[IPKey].Name != "" {
+ continue
+ }
+
+ IPv4s[IPKey] = dhcpAllocation{
+ MAC: MAC,
+ Static: false,
+ IP: IP.To4(),
+ }
+ }
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return IPv4s, IPv6s, err
+ }
+
+ return IPv4s, IPv6s, nil
+}
+
+// getDHCPFreeIPv4 attempts to find a free IPv4 address for the device.
+// It first checks whether there is an existing allocation for the instance.
+// If no previous allocation, then a free IP is picked from the ranges configured.
+func (d *nicBridged) getDHCPFreeIPv4(usedIPs map[[4]byte]dhcpAllocation, netConfig map[string]string, ctName string, deviceMAC string) (net.IP, error) {
+ MAC, err := net.ParseMAC(deviceMAC)
+ if err != nil {
+ return nil, err
+ }
+
+ lxdIP, subnet, err := net.ParseCIDR(netConfig["ipv4.address"])
+ if err != nil {
+ return nil, err
+ }
+
+ dhcpRanges := d.networkDHCPv4Ranges(netConfig)
+
+ // Lets see if there is already an allocation for our device and that it sits within subnet.
+ // If there are custom DHCP ranges defined, check also that the IP falls within one of the ranges.
+ for _, DHCP := range usedIPs {
+ if (ctName == DHCP.Name || bytes.Compare(MAC, DHCP.MAC) == 0) && d.networkDHCPValidIP(subnet, dhcpRanges, DHCP.IP) {
+ return DHCP.IP, nil
+ }
+ }
+
+ // If no custom ranges defined, convert subnet pool to a range.
+ if len(dhcpRanges) <= 0 {
+ dhcpRanges = append(dhcpRanges, dhcpRange{Start: d.networkGetIP(subnet, 1).To4(), End: d.networkGetIP(subnet, -2).To4()})
+ }
+
+ // If no valid existing allocation found, try and find a free one in the subnet pool/ranges.
+ for _, IPRange := range dhcpRanges {
+ inc := big.NewInt(1)
+ startBig := big.NewInt(0)
+ startBig.SetBytes(IPRange.Start)
+ endBig := big.NewInt(0)
+ endBig.SetBytes(IPRange.End)
+
+ for {
+ if startBig.Cmp(endBig) >= 0 {
+ break
+ }
+
+ IP := net.IP(startBig.Bytes())
+
+ // Check IP generated is not LXD's IP.
+ if IP.Equal(lxdIP) {
+ startBig.Add(startBig, inc)
+ continue
+ }
+
+ // Check IP is not already allocated.
+ var IPKey [4]byte
+ copy(IPKey[:], IP.To4())
+ if _, inUse := usedIPs[IPKey]; inUse {
+ startBig.Add(startBig, inc)
+ continue
+ }
+
+ return IP, nil
+ }
+ }
+
+ return nil, fmt.Errorf("No available IP could not be found")
+}
+
+// getDHCPFreeIPv6 attempts to find a free IPv6 address for the device.
+// It first checks whether there is an existing allocation for the instance. Due to the limitations
+// of dnsmasq lease file format, we can only search for previous static allocations.
+// If no previous allocation, then if SLAAC (stateless) mode is enabled on the network, or if
+// DHCPv6 stateful mode is enabled without custom ranges, then an EUI64 IP is generated from the
+// device's MAC address. Finally if stateful custom ranges are enabled, then a free IP is picked
+// from the ranges configured.
+func (d *nicBridged) getDHCPFreeIPv6(usedIPs map[[16]byte]dhcpAllocation, netConfig map[string]string, ctName string, deviceMAC string) (net.IP, error) {
+ lxdIP, subnet, err := net.ParseCIDR(netConfig["ipv6.address"])
+ if err != nil {
+ return nil, err
+ }
+
+ dhcpRanges := d.networkDHCPv6Ranges(netConfig)
+
+ // Lets see if there is already an allocation for our device and that it sits within subnet.
+ // Because of dnsmasq's lease file format we can only match safely against static
+ // allocations using instance name. If there are custom DHCP ranges defined, check also
+ // that the IP falls within one of the ranges.
+ for _, DHCP := range usedIPs {
+ if ctName == DHCP.Name && d.networkDHCPValidIP(subnet, dhcpRanges, DHCP.IP) {
+ return DHCP.IP, nil
+ }
+ }
+
+ // Try using an EUI64 IP when in either SLAAC or DHCPv6 stateful mode without custom ranges.
+ if !shared.IsTrue(netConfig["ipv6.dhcp.stateful"]) || netConfig["ipv6.dhcp.ranges"] == "" {
+ MAC, err := net.ParseMAC(deviceMAC)
+ if err != nil {
+ return nil, err
+ }
+
+ IP, err := eui64.ParseMAC(subnet.IP, MAC)
+ if err != nil {
+ return nil, err
+ }
+
+ // Check IP is not already allocated and not the LXD IP.
+ var IPKey [16]byte
+ copy(IPKey[:], IP.To16())
+ _, inUse := usedIPs[IPKey]
+ if !inUse && !IP.Equal(lxdIP) {
+ return IP, nil
+ }
+ }
+
+ // If no custom ranges defined, convert subnet pool to a range.
+ if len(dhcpRanges) <= 0 {
+ dhcpRanges = append(dhcpRanges, dhcpRange{Start: d.networkGetIP(subnet, 1).To16(), End: d.networkGetIP(subnet, -1).To16()})
+ }
+
+ // If we get here, then someone already has our SLAAC IP, or we are using custom ranges.
+ // Try and find a free one in the subnet pool/ranges.
+ for _, IPRange := range dhcpRanges {
+ inc := big.NewInt(1)
+ startBig := big.NewInt(0)
+ startBig.SetBytes(IPRange.Start)
+ endBig := big.NewInt(0)
+ endBig.SetBytes(IPRange.End)
+
+ for {
+ if startBig.Cmp(endBig) >= 0 {
+ break
+ }
+
+ IP := net.IP(startBig.Bytes())
+
+ // Check IP generated is not LXD's IP.
+ if IP.Equal(lxdIP) {
+ startBig.Add(startBig, inc)
+ continue
+ }
+
+ // Check IP is not already allocated.
+ var IPKey [16]byte
+ copy(IPKey[:], IP.To16())
+ if _, inUse := usedIPs[IPKey]; inUse {
+ startBig.Add(startBig, inc)
+ continue
+ }
+
+ return IP, nil
+ }
+ }
+
+ return nil, fmt.Errorf("No available IP could not be found")
+}
+
+func (d *nicBridged) networkGetIP(subnet *net.IPNet, host int64) net.IP {
+ // Convert IP to a big int
+ bigIP := big.NewInt(0)
+ bigIP.SetBytes(subnet.IP.To16())
+
+ // Deal with negative offsets
+ bigHost := big.NewInt(host)
+ bigCount := big.NewInt(host)
+ if host < 0 {
+ mask, size := subnet.Mask.Size()
+
+ bigHosts := big.NewFloat(0)
+ bigHosts.SetFloat64((math.Pow(2, float64(size-mask))))
+ bigHostsInt, _ := bigHosts.Int(nil)
+
+ bigCount.Set(bigHostsInt)
+ bigCount.Add(bigCount, bigHost)
+ }
+
+ // Get the new IP int
+ bigIP.Add(bigIP, bigCount)
+
+ // Generate an IPv6
+ if subnet.IP.To4() == nil {
+ newIP := bigIP.Bytes()
+ return newIP
+ }
+
+ // Generate an IPv4
+ newIP := make(net.IP, 4)
+ binary.BigEndian.PutUint32(newIP, uint32(bigIP.Int64()))
+ return newIP
+}
+
+const (
+ clearLeaseAll = iota
+ clearLeaseIPv4Only
+ clearLeaseIPv6Only
+)
+
+// networkClearLease clears leases from a running dnsmasq process.
+func (d *nicBridged) networkClearLease(name string, network string, hwaddr string, mode int) error {
+ leaseFile := shared.VarPath("networks", network, "dnsmasq.leases")
+
+ // Check that we are in fact running a dnsmasq for the network
+ if !shared.PathExists(leaseFile) {
+ return nil
+ }
+
+ // Convert MAC string to bytes to avoid any case comparison issues later.
+ srcMAC, err := net.ParseMAC(hwaddr)
+ if err != nil {
+ return err
+ }
+
+ iface, err := net.InterfaceByName(network)
+ if err != nil {
+ return err
+ }
+
+ // Get IPv4 and IPv6 address of interface running dnsmasq on host.
+ addrs, err := iface.Addrs()
+ if err != nil {
+ return err
+ }
+
+ var dstIPv4, dstIPv6 net.IP
+ for _, addr := range addrs {
+ ip, _, err := net.ParseCIDR(addr.String())
+ if err != nil {
+ return err
+ }
+ if !ip.IsGlobalUnicast() {
+ continue
+ }
+ if ip.To4() == nil {
+ dstIPv6 = ip
+ } else {
+ dstIPv4 = ip
+ }
+ }
+
+ // Iterate the dnsmasq leases file looking for matching leases for this instance to release.
+ file, err := os.Open(leaseFile)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ var dstDUID string
+ errs := []error{}
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ fields := strings.Fields(scanner.Text())
+ fieldsLen := len(fields)
+
+ // Handle lease lines
+ if fieldsLen == 5 {
+ if (mode == clearLeaseAll || mode == clearLeaseIPv4Only) && srcMAC.String() == fields[1] { // Handle IPv4 leases by matching MAC address to lease.
+ srcIP := net.ParseIP(fields[2])
+
+ if dstIPv4 == nil {
+ errs = append(errs, fmt.Errorf("Failed to release DHCPv4 lease for instance \"%s\", IP \"%s\", MAC \"%s\", %v", name, srcIP, srcMAC, "No server address found"))
+ continue
+ }
+
+ err = d.networkDHCPv4Release(srcMAC, srcIP, dstIPv4)
+ if err != nil {
+ errs = append(errs, fmt.Errorf("Failed to release DHCPv4 lease for instance \"%s\", IP \"%s\", MAC \"%s\", %v", name, srcIP, srcMAC, err))
+ }
+ } else if (mode == clearLeaseAll || mode == clearLeaseIPv6Only) && name == fields[3] { // Handle IPv6 addresses by matching hostname to lease.
+ IAID := fields[1]
+ srcIP := net.ParseIP(fields[2])
+ DUID := fields[4]
+
+ // Skip IPv4 addresses.
+ if srcIP.To4() != nil {
+ continue
+ }
+
+ if dstIPv6 == nil {
+ errs = append(errs, fmt.Errorf("Failed to release DHCPv6 lease for instance \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %s", name, srcIP, DUID, IAID, "No server address found"))
+ continue // Cant send release packet if no dstIP found.
+ }
+
+ if dstDUID == "" {
+ errs = append(errs, fmt.Errorf("Failed to release DHCPv6 lease for instance \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %s", name, srcIP, DUID, IAID, "No server DUID found"))
+ continue // Cant send release packet if no dstDUID found.
+ }
+
+ err = d.networkDHCPv6Release(DUID, IAID, srcIP, dstIPv6, dstDUID)
+ if err != nil {
+ errs = append(errs, fmt.Errorf("Failed to release DHCPv6 lease for instance \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %v", name, srcIP, DUID, IAID, err))
+ }
+ }
+ } else if fieldsLen == 2 && fields[0] == "duid" {
+ // Handle server DUID line needed for releasing IPv6 leases.
+ // This should come before the IPv6 leases in the lease file.
+ dstDUID = fields[1]
+ }
+ }
+
+ if len(errs) > 0 {
+ return fmt.Errorf("%v", errs)
+ }
+
+ if err := scanner.Err(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// networkDHCPv4Release sends a DHCPv4 release packet to a DHCP server.
+func (d *nicBridged) networkDHCPv4Release(srcMAC net.HardwareAddr, srcIP net.IP, dstIP net.IP) error {
+ dstAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:67", dstIP.String()))
+ if err != nil {
+ return err
+ }
+ conn, err := net.DialUDP("udp", nil, dstAddr)
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ //Random DHCP transaction ID
+ xid := rand.Uint32()
+
+ // Construct a DHCP packet pretending to be from the source IP and MAC supplied.
+ dhcp := layers.DHCPv4{
+ Operation: layers.DHCPOpRequest,
+ HardwareType: layers.LinkTypeEthernet,
+ ClientHWAddr: srcMAC,
+ ClientIP: srcIP,
+ Xid: xid,
+ }
+
+ // Add options to DHCP release packet.
+ dhcp.Options = append(dhcp.Options,
+ layers.NewDHCPOption(layers.DHCPOptMessageType, []byte{byte(layers.DHCPMsgTypeRelease)}),
+ layers.NewDHCPOption(layers.DHCPOptServerID, dstIP.To4()),
+ )
+
+ buf := gopacket.NewSerializeBuffer()
+ opts := gopacket.SerializeOptions{
+ ComputeChecksums: true,
+ FixLengths: true,
+ }
+
+ err = gopacket.SerializeLayers(buf, opts, &dhcp)
+ if err != nil {
+ return err
+ }
+
+ _, err = conn.Write(buf.Bytes())
+ return err
+}
+
+// networkDHCPv6Release sends a DHCPv6 release packet to a DHCP server.
+func (d *nicBridged) networkDHCPv6Release(srcDUID string, srcIAID string, srcIP net.IP, dstIP net.IP, dstDUID string) error {
+ dstAddr, err := net.ResolveUDPAddr("udp6", fmt.Sprintf("[%s]:547", dstIP.String()))
+ if err != nil {
+ return err
+ }
+ conn, err := net.DialUDP("udp6", nil, dstAddr)
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ // Construct a DHCPv6 packet pretending to be from the source IP and MAC supplied.
+ dhcp := layers.DHCPv6{
+ MsgType: layers.DHCPv6MsgTypeRelease,
+ }
+
+ // Convert Server DUID from string to byte array
+ dstDUIDRaw, err := hex.DecodeString(strings.Replace(dstDUID, ":", "", -1))
+ if err != nil {
+ return err
+ }
+
+ // Convert DUID from string to byte array
+ srcDUIDRaw, err := hex.DecodeString(strings.Replace(srcDUID, ":", "", -1))
+ if err != nil {
+ return err
+ }
+
+ // Convert IAID string to int
+ srcIAIDRaw, err := strconv.Atoi(srcIAID)
+ if err != nil {
+ return err
+ }
+
+ // Build the Identity Association details option manually (as not provided by gopacket).
+ iaAddr := d.networkDHCPv6CreateIAAddress(srcIP)
+ ianaRaw := d.networkDHCPv6CreateIANA(srcIAIDRaw, iaAddr)
+
+ // Add options to DHCP release packet.
+ dhcp.Options = append(dhcp.Options,
+ layers.NewDHCPv6Option(layers.DHCPv6OptServerID, dstDUIDRaw),
+ layers.NewDHCPv6Option(layers.DHCPv6OptClientID, srcDUIDRaw),
+ layers.NewDHCPv6Option(layers.DHCPv6OptIANA, ianaRaw),
+ )
+
+ buf := gopacket.NewSerializeBuffer()
+ opts := gopacket.SerializeOptions{
+ ComputeChecksums: true,
+ FixLengths: true,
+ }
+
+ err = gopacket.SerializeLayers(buf, opts, &dhcp)
+ if err != nil {
+ return err
+ }
+
+ _, err = conn.Write(buf.Bytes())
+ return err
+}
+
+// networkDHCPv6CreateIANA creates a DHCPv6 Identity Association for Non-temporary Address (rfc3315 IA_NA) option.
+func (d *nicBridged) networkDHCPv6CreateIANA(IAID int, IAAddr []byte) []byte {
+ data := make([]byte, 12)
+ binary.BigEndian.PutUint32(data[0:4], uint32(IAID)) // Identity Association Identifier
+ binary.BigEndian.PutUint32(data[4:8], uint32(0)) // T1
+ binary.BigEndian.PutUint32(data[8:12], uint32(0)) // T2
+ data = append(data, IAAddr...) // Append the IA Address details
+ return data
+}
+
+// networkDHCPv6CreateIAAddress creates a DHCPv6 Identity Association Address (rfc3315) option.
+func (d *nicBridged) networkDHCPv6CreateIAAddress(IP net.IP) []byte {
+ data := make([]byte, 28)
+ binary.BigEndian.PutUint16(data[0:2], uint16(layers.DHCPv6OptIAAddr)) // Sub-Option type
+ binary.BigEndian.PutUint16(data[2:4], uint16(24)) // Length (fixed at 24 bytes)
+ copy(data[4:20], IP) // IPv6 address to be released
+ binary.BigEndian.PutUint32(data[20:24], uint32(0)) // Preferred liftetime
+ binary.BigEndian.PutUint32(data[24:28], uint32(0)) // Valid lifetime
+ return data
+}
diff --git a/test/suites/container_devices_nic_bridged.sh b/test/suites/container_devices_nic_bridged.sh
index b9ccac54dc..d385f0d5c9 100644
--- a/test/suites/container_devices_nic_bridged.sh
+++ b/test/suites/container_devices_nic_bridged.sh
@@ -23,6 +23,9 @@ test_container_devices_nic_bridged() {
lxc network set "${brName}" ipv6.routes 2001:db8::3:0/64
[ "$(cat /sys/class/net/${brName}/address)" = "00:11:22:33:44:55" ]
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Test pre-launch profile config is applied at launch
lxc profile copy default "${ctName}"
lxc profile device set "${ctName}" eth0 ipv4.routes "192.0.2.1${ipRand}/32"
@@ -259,7 +262,7 @@ test_container_devices_nic_bridged() {
lxc launch testimage "${ctName}" -p "${ctName}"
# Request DHCPv4 lease with custom name (to check managed name is allocated instead).
- lxc exec "${ctName}" -- /sbin/udhcpc -i eth0 -F "${ctName}custom"
+ lxc exec "${ctName}" -- udhcpc -i eth0 -F "${ctName}custom"
# Check DHCPv4 lease is allocated.
if ! grep -i "${ctMAC}" "${LXD_DIR}/networks/${brName}/dnsmasq.leases" ; then
@@ -273,15 +276,30 @@ test_container_devices_nic_bridged() {
false
fi
+ # Request DHCPv6 lease (if udhcpc6 is in busybox image).
+ busyboxUdhcpc6=$(lxc exec "${ctName}" -- busybox --list | grep udhcpc6)
+ if [ "${busyboxUdhcpc6}" = "udhcpc6" ]; then
+ lxc exec "${ctName}" -- udhcpc6 -i eth0
+ fi
+
# Delete container, check LXD releases lease.
lxc delete "${ctName}" -f
- # Check DHCPv4 lease is released.
- if grep -i "${ctMAC}" "${LXD_DIR}/networks/${brName}/dnsmasq.leases" ; then
+ # Check DHCPv4 lease is released (space before the MAC important to avoid mismatching IPv6 lease).
+ if grep -i " ${ctMAC}" "${LXD_DIR}/networks/${brName}/dnsmasq.leases" ; then
echo "DHCPv4 lease not released"
false
fi
+ # Wait for DHCPv6 release to be processed.
+ sleep 1
+
+ # Check DHCPv6 lease is released.
+ if grep -i " ${ctName}" "${LXD_DIR}/networks/${brName}/dnsmasq.leases" ; then
+ echo "DHCPv6 lease not released"
+ false
+ fi
+
# Check dnsmasq host config file is removed.
if [ -f "${LXD_DIR}/networks/${brName}/dnsmasq.hosts/${ctName}" ] ; then
echo "dnsmasq host config file not removed"
@@ -316,6 +334,29 @@ test_container_devices_nic_bridged() {
false
fi
+ lxc config device add "${ctName}" eth0 nic nictype=bridged parent="${brName}" name=eth0
+ if [ ! -f "${LXD_DIR}/networks/${brName}/dnsmasq.hosts/${ctName}" ] ; then
+ echo "dnsmasq host config file not created"
+ false
+ fi
+
+ # Check connecting device to non-managed bridged.
+ ip link add "${ctName}" type dummy
+ lxc config device set "${ctName}" eth0 parent "${ctName}"
+ if [ -f "${LXD_DIR}/networks/${brName}/dnsmasq.hosts/${ctName}" ] ; then
+ echo "dnsmasq host config file not removed from old network"
+ false
+ fi
+
+ ip link delete "${ctName}"
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
+
# Cleanup.
lxc delete "${ctName}" -f
lxc network delete "${brName}"
diff --git a/test/suites/container_devices_nic_bridged_filtering.sh b/test/suites/container_devices_nic_bridged_filtering.sh
index 105e05eb58..d36c8d8838 100644
--- a/test/suites/container_devices_nic_bridged_filtering.sh
+++ b/test/suites/container_devices_nic_bridged_filtering.sh
@@ -22,6 +22,9 @@ test_container_devices_nic_bridged_filtering() {
lxc network set "${brName}" ipv6.address 2001:db8::1/64
[ "$(cat /sys/class/net/${brName}/address)" = "00:11:22:33:44:55" ]
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Create profile for new containers.
lxc profile copy default "${ctPrefix}"
lxc profile device set "${ctPrefix}" eth0 parent "${brName}"
@@ -56,7 +59,6 @@ test_container_devices_nic_bridged_filtering() {
# Setup fake MAC inside container.
lxc exec "${ctPrefix}A" -- ip link set dev eth0 address 00:11:22:33:44:56 up
- lxc exec "${ctPrefix}A" -- ip a add 192.0.2.2/24 dev eth0
# Check that ping is no longer working (i.e its filtered after fake MAC setup).
if lxc exec "${ctPrefix}A" -- ping -c2 -W1 192.0.2.1; then
@@ -111,7 +113,7 @@ test_container_devices_nic_bridged_filtering() {
# Check DHCPv4 allocation still works.
lxc exec "${ctPrefix}A" -- ip link set dev eth0 address "${ctAMAC}" up
- lxc exec "${ctPrefix}A" -- /sbin/udhcpc -i eth0 -n
+ lxc exec "${ctPrefix}A" -- udhcpc -i eth0 -n
lxc exec "${ctPrefix}A" -- ip a flush dev eth0
lxc exec "${ctPrefix}A" -- ip a add 192.0.2.2/24 dev eth0
@@ -137,12 +139,12 @@ test_container_devices_nic_bridged_filtering() {
# Stop CT A and check filters are cleaned up.
lxc stop -f "${ctPrefix}A"
- if ebtables -L --Lmac2 --Lx | grep -e "192.0.2.2" ; then
- echo "IPv4 filter still applied as part of ipv4_filtering in ebtables"
+ if ebtables -L --Lmac2 --Lx | grep -e "${ctAHost}" ; then
+ echo "IP filter still applied as part of ipv4_filtering in ebtables"
false
fi
- # Remove static IP and check IP filter works with previous DHCP release.
+ # Remove static IP and check IP filter works with previous DHCP lease.
rm "${LXD_DIR}/networks/${brName}/dnsmasq.hosts/${ctPrefix}A"
lxc config device unset "${ctPrefix}A" eth0 ipv4.address
lxc start "${ctPrefix}A"
@@ -228,8 +230,15 @@ test_container_devices_nic_bridged_filtering() {
false
fi
- # TODO: Cannot test DHCPv6 as busybox doesn't contain a DHCPv6 client yet.
+ # Check DHCPv6 allocation still works (if udhcpc6 is in busybox image).
lxc exec "${ctPrefix}A" -- ip link set dev eth0 address "${ctAMAC}" up
+
+ busyboxUdhcpc6=$(lxc exec "${ctPrefix}A" -- busybox --list | grep udhcpc6)
+ if [ "${busyboxUdhcpc6}" = "udhcpc6" ]; then
+ lxc exec "${ctPrefix}A" -- udhcpc6 -i eth0 -n
+ fi
+
+ lxc exec "${ctPrefix}A" -- ip -6 a flush dev eth0
lxc exec "${ctPrefix}A" -- ip -6 a add 2001:db8::2/64 dev eth0
sleep 2 # Wait for DAD.
@@ -255,8 +264,8 @@ test_container_devices_nic_bridged_filtering() {
# Stop CT A and check filters are cleaned up.
lxc stop -f "${ctPrefix}A"
- if ebtables -L --Lmac2 --Lx | grep -e "2001:db8::2" ; then
- echo "IPv6 filter still applied as part of ipv6_filtering in ebtables"
+ if ebtables -L --Lmac2 --Lx | grep -e "${ctAHost}" ; then
+ echo "IP filter still applied as part of ipv6_filtering in ebtables"
false
fi
@@ -288,6 +297,16 @@ test_container_devices_nic_bridged_filtering() {
false
fi
+ lxc stop -f "${ctPrefix}A"
+ lxc stop -f "${ctPrefix}B"
+
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
+
lxc delete -f "${ctPrefix}A"
lxc delete -f "${ctPrefix}B"
lxc network delete "${brName}"
From d0f311cbd0cbea56a4c0ab30a25de34cb447af65 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 11:44:25 +0100
Subject: [PATCH 20/32] device/nic/physical: Adds physical NIC device
implementation
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_physical.go | 116 ++++++++++++++++++
test/suites/container_devices_nic_physical.sh | 10 ++
2 files changed, 126 insertions(+)
create mode 100644 lxd/device/nic_physical.go
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
new file mode 100644
index 0000000000..21a4380ca3
--- /dev/null
+++ b/lxd/device/nic_physical.go
@@ -0,0 +1,116 @@
+package device
+
+import (
+ "fmt"
+
+ "github.com/lxc/lxd/lxd/instance"
+ "github.com/lxc/lxd/shared"
+)
+
+type nicPhysical struct {
+ deviceCommon
+}
+
+// valite checks the supplied config for correctness.
+func (d *nicPhysical) validate() error {
+ if d.instance.Type() != instance.TypeContainer {
+ return ErrUnsupportedDevType
+ }
+
+ if d.config["parent"] == "" {
+ return fmt.Errorf("Requires parent property")
+ }
+
+ for k := range d.config {
+ if !shared.StringInSlice(k, []string{"type", "nictype", "parent", "name", "mtu", "hwaddr", "vlan", "maas.subnet.ipv4", "maas.subnet.ipv6"}) {
+ return fmt.Errorf("Invalid device option: %s", k)
+ }
+ }
+
+ return nil
+}
+
+// Start is run when the device is added to the container.
+func (d *nicPhysical) Start() (*RunConfig, error) {
+ if d.config["name"] == "" {
+ return nil, fmt.Errorf("Requires name property to start")
+ }
+
+ saveData := make(map[string]string)
+
+ // Record the host_name device used for restoration later.
+ saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ createdDev, err := NetworkCreateVlanDeviceIfNeeded(d.config["parent"], saveData["host_name"], d.config["vlan"])
+ if err != nil {
+ return nil, err
+ }
+
+ // Record whether we created this device or not so it can be removed on stop.
+ saveData["last_state.created"] = fmt.Sprintf("%t", createdDev)
+
+ // If we return from this function with an error, ensure we clean up created device.
+ defer func() {
+ if err != nil && createdDev {
+ NetworkRemoveInterface(saveData["host_name"])
+ }
+ }()
+
+ // If we didn't create the device we should track various properties so we can
+ // restore them when the container is stopped or the device is detached.
+ if createdDev == false {
+ err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // Set the MAC address.
+ if d.config["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
+ }
+
+ // Set the MTU.
+ if d.config["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ }
+ }
+
+ err = d.volatileSet(saveData)
+ if err != nil {
+ return nil, err
+ }
+
+ runConf := RunConfig{}
+ runConf.NetworkInterfaces = [][]RunConfigItem{{
+ {Key: "name", Value: d.config["name"]},
+ {Key: "type", Value: "phys"},
+ {Key: "flags", Value: "up"},
+ {Key: "link", Value: saveData["host_name"]},
+ }}
+
+ return &runConf, nil
+}
+
+// Stop is run when the device is removed from the container.
+func (d *nicPhysical) Stop() error {
+ defer d.volatileSet(map[string]string{
+ "host_name": "",
+ "last_state.hwaddr": "",
+ "last_state.mtu": "",
+ "last_state.created": "",
+ })
+
+ v := d.volatileGet()
+ hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ err := networkRestorePhysicalNic(hostName, v)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/test/suites/container_devices_nic_physical.sh b/test/suites/container_devices_nic_physical.sh
index 9f632c7202..ab05624b2b 100644
--- a/test/suites/container_devices_nic_physical.sh
+++ b/test/suites/container_devices_nic_physical.sh
@@ -9,6 +9,9 @@ test_container_devices_nic_physical() {
# Create dummy interface for use as parent.
ip link add "${ctName}" address "${dummyMAC}" type dummy
+ # Record how many nics we started with.
+ startNicCount=$(find /sys/class/net | wc -l)
+
# Create test container from default profile.
lxc init testimage "${ctName}"
@@ -182,6 +185,13 @@ test_container_devices_nic_physical() {
lxc delete "${ctName}"
+ # Check we haven't left any NICS lying around.
+ endNicCount=$(find /sys/class/net | wc -l)
+ if [ "$startNicCount" != "$endNicCount" ]; then
+ echo "leftover NICS detected"
+ false
+ fi
+
# Remove dummy interface (should still exist).
ip link delete "${ctName}"
}
From 624bb9ee7b062c3c49d4c626556ec9bf03ccecb2 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:13:09 +0100
Subject: [PATCH 21/32] networks/utils: Removes functions that are moved to
device implementations
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/networks_utils.go | 571 ------------------------------------------
1 file changed, 571 deletions(-)
diff --git a/lxd/networks_utils.go b/lxd/networks_utils.go
index bdd39284ff..dec09fb01c 100644
--- a/lxd/networks_utils.go
+++ b/lxd/networks_utils.go
@@ -13,16 +13,12 @@ import (
"net"
"os"
"os/exec"
- "path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
- "github.com/google/gopacket"
- "github.com/google/gopacket/layers"
- "github.com/mdlayher/eui64"
"golang.org/x/sys/unix"
"github.com/lxc/lxd/lxd/cluster"
@@ -727,226 +723,6 @@ func networkKillForkDNS(name string) error {
return nil
}
-// dhcpRange represents a range of IPs from start to end.
-type dhcpRange struct {
- Start net.IP
- End net.IP
-}
-
-// networkDHCPv6Ranges returns a parsed set of DHCPv6 ranges for a particular network.
-func networkDHCPv6Ranges(netConfig map[string]string) []dhcpRange {
- dhcpRanges := make([]dhcpRange, 0)
- if netConfig["ipv6.dhcp.ranges"] != "" {
- for _, r := range strings.Split(netConfig["ipv6.dhcp.ranges"], ",") {
- parts := strings.SplitN(strings.TrimSpace(r), "-", 2)
- if len(parts) == 2 {
- startIP := net.ParseIP(parts[0])
- endIP := net.ParseIP(parts[1])
- dhcpRanges = append(dhcpRanges, dhcpRange{
- Start: startIP.To16(),
- End: endIP.To16(),
- })
- }
- }
- }
-
- return dhcpRanges
-}
-
-// networkDHCPv4Ranges returns a parsed set of DHCPv4 ranges for a particular network.
-func networkDHCPv4Ranges(netConfig map[string]string) []dhcpRange {
- dhcpRanges := make([]dhcpRange, 0)
- if netConfig["ipv4.dhcp.ranges"] != "" {
- for _, r := range strings.Split(netConfig["ipv4.dhcp.ranges"], ",") {
- parts := strings.SplitN(strings.TrimSpace(r), "-", 2)
- if len(parts) == 2 {
- startIP := net.ParseIP(parts[0])
- endIP := net.ParseIP(parts[1])
- dhcpRanges = append(dhcpRanges, dhcpRange{
- Start: startIP.To4(),
- End: endIP.To4(),
- })
- }
- }
- }
-
- return dhcpRanges
-}
-
-// networkDHCPValidIP returns whether an IP fits inside one of the supplied DHCP ranges and subnet.
-func networkDHCPValidIP(subnet *net.IPNet, ranges []dhcpRange, IP net.IP) bool {
- inSubnet := subnet.Contains(IP)
- if !inSubnet {
- return false
- }
-
- if len(ranges) > 0 {
- for _, IPRange := range ranges {
- if bytes.Compare(IP, IPRange.Start) >= 0 && bytes.Compare(IP, IPRange.End) <= 0 {
- return true
- }
- }
- } else if inSubnet {
- return true
- }
-
- return false
-}
-
-// networkDHCPFindFreeIPv6 attempts to find a free IPv6 address for the device.
-// It first checks whether there is an existing allocation for the container. Due to the limitations
-// of dnsmasq lease file format, we can only search for previous static allocations.
-// If no previous allocation, then if SLAAC (stateless) mode is enabled on the network, or if
-// DHCPv6 stateful mode is enabled without custom ranges, then an EUI64 IP is generated from the
-// device's MAC address. Finally if stateful custom ranges are enabled, then a free IP is picked
-// from the ranges configured.
-func networkDHCPFindFreeIPv6(usedIPs map[[16]byte]dnsmasq.DHCPAllocation, netConfig map[string]string, ctName string, deviceMAC string) (net.IP, error) {
- lxdIP, subnet, err := net.ParseCIDR(netConfig["ipv6.address"])
- if err != nil {
- return nil, err
- }
-
- dhcpRanges := networkDHCPv6Ranges(netConfig)
-
- // Lets see if there is already an allocation for our device and that it sits within subnet.
- // Because of dnsmasq's lease file format we can only match safely against static
- // allocations using container name. If there are custom DHCP ranges defined, check also
- // that the IP falls within one of the ranges.
- for _, DHCP := range usedIPs {
- if ctName == DHCP.Name && networkDHCPValidIP(subnet, dhcpRanges, DHCP.IP) {
- return DHCP.IP, nil
- }
- }
-
- // Try using an EUI64 IP when in either SLAAC or DHCPv6 stateful mode without custom ranges.
- if !shared.IsTrue(netConfig["ipv6.dhcp.stateful"]) || netConfig["ipv6.dhcp.ranges"] == "" {
- MAC, err := net.ParseMAC(deviceMAC)
- if err != nil {
- return nil, err
- }
-
- IP, err := eui64.ParseMAC(subnet.IP, MAC)
- if err != nil {
- return nil, err
- }
-
- // Check IP is not already allocated and not the LXD IP.
- var IPKey [16]byte
- copy(IPKey[:], IP.To16())
- _, inUse := usedIPs[IPKey]
- if !inUse && !IP.Equal(lxdIP) {
- return IP, nil
- }
- }
-
- // If no custom ranges defined, convert subnet pool to a range.
- if len(dhcpRanges) <= 0 {
- dhcpRanges = append(dhcpRanges, dhcpRange{Start: networkGetIP(subnet, 1).To16(), End: networkGetIP(subnet, -1).To16()})
- }
-
- // If we get here, then someone already has our SLAAC IP, or we are using custom ranges.
- // Try and find a free one in the subnet pool/ranges.
- for _, IPRange := range dhcpRanges {
- inc := big.NewInt(1)
- startBig := big.NewInt(0)
- startBig.SetBytes(IPRange.Start)
- endBig := big.NewInt(0)
- endBig.SetBytes(IPRange.End)
-
- for {
- if startBig.Cmp(endBig) >= 0 {
- break
- }
-
- IP := net.IP(startBig.Bytes())
-
- // Check IP generated is not LXD's IP.
- if IP.Equal(lxdIP) {
- startBig.Add(startBig, inc)
- continue
- }
-
- // Check IP is not already allocated.
- var IPKey [16]byte
- copy(IPKey[:], IP.To16())
- if _, inUse := usedIPs[IPKey]; inUse {
- startBig.Add(startBig, inc)
- continue
- }
-
- // Used by networkUpdateStatic temporarily to build new static allocations.
- return IP, nil
- }
- }
-
- return nil, fmt.Errorf("No available IP could not be found")
-}
-
-// networkDHCPFindFreeIPv4 attempts to find a free IPv4 address for the device.
-// It first checks whether there is an existing allocation for the container.
-// If no previous allocation, then a free IP is picked from the ranges configured.
-func networkDHCPFindFreeIPv4(usedIPs map[[4]byte]dnsmasq.DHCPAllocation, netConfig map[string]string, ctName string, deviceMAC string) (net.IP, error) {
- MAC, err := net.ParseMAC(deviceMAC)
- if err != nil {
- return nil, err
- }
-
- lxdIP, subnet, err := net.ParseCIDR(netConfig["ipv4.address"])
- if err != nil {
- return nil, err
- }
-
- dhcpRanges := networkDHCPv4Ranges(netConfig)
-
- // Lets see if there is already an allocation for our device and that it sits within subnet.
- // If there are custom DHCP ranges defined, check also that the IP falls within one of the ranges.
- for _, DHCP := range usedIPs {
- if (ctName == DHCP.Name || bytes.Compare(MAC, DHCP.MAC) == 0) && networkDHCPValidIP(subnet, dhcpRanges, DHCP.IP) {
- return DHCP.IP, nil
- }
- }
-
- // If no custom ranges defined, convert subnet pool to a range.
- if len(dhcpRanges) <= 0 {
- dhcpRanges = append(dhcpRanges, dhcpRange{Start: networkGetIP(subnet, 1).To4(), End: networkGetIP(subnet, -2).To4()})
- }
-
- // If no valid existing allocation found, try and find a free one in the subnet pool/ranges.
- for _, IPRange := range dhcpRanges {
- inc := big.NewInt(1)
- startBig := big.NewInt(0)
- startBig.SetBytes(IPRange.Start)
- endBig := big.NewInt(0)
- endBig.SetBytes(IPRange.End)
-
- for {
- if startBig.Cmp(endBig) >= 0 {
- break
- }
-
- IP := net.IP(startBig.Bytes())
-
- // Check IP generated is not LXD's IP.
- if IP.Equal(lxdIP) {
- startBig.Add(startBig, inc)
- continue
- }
-
- // Check IP is not already allocated.
- var IPKey [4]byte
- copy(IPKey[:], IP.To4())
- if _, inUse := usedIPs[IPKey]; inUse {
- startBig.Add(startBig, inc)
- continue
- }
-
- return IP, nil
- }
- }
-
- return nil, fmt.Errorf("No available IP could not be found")
-}
-
func networkUpdateStatic(s *state.State, networkName string) error {
// We don't want to race with ourselves here
dnsmasq.ConfigMutex.Lock()
@@ -1203,118 +979,6 @@ func networkGetMacSlice(hwaddr string) []string {
return buf
}
-const (
- clearLeaseAll = iota
- clearLeaseIPv4Only
- clearLeaseIPv6Only
-)
-
-func networkClearLease(name string, network string, hwaddr string, mode int) error {
- leaseFile := shared.VarPath("networks", network, "dnsmasq.leases")
-
- // Check that we are in fact running a dnsmasq for the network
- if !shared.PathExists(leaseFile) {
- return nil
- }
-
- // Convert MAC string to bytes to avoid any case comparison issues later.
- srcMAC, err := net.ParseMAC(hwaddr)
- if err != nil {
- return err
- }
-
- iface, err := net.InterfaceByName(network)
- if err != nil {
- return err
- }
-
- // Get IPv4 and IPv6 address of interface running dnsmasq on host.
- addrs, err := iface.Addrs()
- if err != nil {
- return err
- }
-
- var dstIPv4, dstIPv6 net.IP
- for _, addr := range addrs {
- ip, _, err := net.ParseCIDR(addr.String())
- if err != nil {
- return err
- }
- if !ip.IsGlobalUnicast() {
- continue
- }
- if ip.To4() == nil {
- dstIPv6 = ip
- } else {
- dstIPv4 = ip
- }
- }
-
- // Iterate the dnsmasq leases file looking for matching leases for this container to release.
- file, err := os.Open(leaseFile)
- if err != nil {
- return err
- }
- defer file.Close()
-
- var dstDUID string
- scanner := bufio.NewScanner(file)
- for scanner.Scan() {
- fields := strings.Fields(scanner.Text())
- fieldsLen := len(fields)
-
- // Handle lease lines
- if fieldsLen == 5 {
- if (mode == clearLeaseAll || mode == clearLeaseIPv4Only) && srcMAC.String() == fields[1] { // Handle IPv4 leases by matching MAC address to lease.
- srcIP := net.ParseIP(fields[2])
-
- if dstIPv4 == nil {
- logger.Errorf("Failed to release DHCPv4 lease for container \"%s\", IP \"%s\", MAC \"%s\", %v", name, srcIP, srcMAC, "No server address found")
- continue
- }
-
- err = networkDHCPv4Release(srcMAC, srcIP, dstIPv4)
- if err != nil {
- logger.Errorf("Failed to release DHCPv4 lease for container \"%s\", IP \"%s\", MAC \"%s\", %v", name, srcIP, srcMAC, err)
- }
- } else if (mode == clearLeaseAll || mode == clearLeaseIPv6Only) && name == fields[3] { // Handle IPv6 addresses by matching hostname to lease.
- IAID := fields[1]
- srcIP := net.ParseIP(fields[2])
- DUID := fields[4]
-
- // Skip IPv4 addresses.
- if srcIP.To4() != nil {
- continue
- }
-
- if dstIPv6 == nil {
- logger.Errorf("Failed to release DHCPv6 lease for container \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %s", name, srcIP, DUID, IAID, "No server address found")
- continue // Cant send release packet if no dstIP found.
- }
-
- if dstDUID == "" {
- logger.Errorf("Failed to release DHCPv6 lease for container \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %s", name, srcIP, DUID, IAID, "No server DUID found")
- continue // Cant send release packet if no dstDUID found.
- }
-
- err = networkDHCPv6Release(DUID, IAID, srcIP, dstIPv6, dstDUID)
- if err != nil {
- logger.Errorf("Failed to release DHCPv6 lease for container \"%s\", IP \"%s\", DUID \"%s\", IAID \"%s\": %v", name, srcIP, DUID, IAID, err)
- }
- }
- } else if fieldsLen == 2 && fields[0] == "duid" {
- // Handle server DUID line needed for releasing IPv6 leases.
- // This should come before the IPv6 leases in the lease file.
- dstDUID = fields[1]
- }
- }
- if err := scanner.Err(); err != nil {
- return err
- }
-
- return nil
-}
-
func networkGetState(netIf net.Interface) api.NetworkState {
netState := "down"
netType := "unknown"
@@ -1453,238 +1117,3 @@ func networkApplyBootRoutesV6(devName string, routes []string) error {
return nil
}
-
-// virtFuncInfo holds information about SR-IOV virtual functions.
-type virtFuncInfo struct {
- mac string
- vlan int
- spoofcheck bool
-}
-
-// networkGetVirtFuncInfo returns info about an SR-IOV virtual function from the ip tool.
-func networkGetVirtFuncInfo(devName string, vfID int) (vf virtFuncInfo, err error) {
- cmd := exec.Command("ip", "link", "show", devName)
- stdout, err := cmd.StdoutPipe()
- if err != nil {
- return
- }
- if err = cmd.Start(); err != nil {
- return
- }
- defer stdout.Close()
-
- // Try and match: "vf 1 MAC 00:00:00:00:00:00, vlan 4095, spoof checking off"
- reVlan := regexp.MustCompile(fmt.Sprintf(`vf %d MAC ((?:[[:xdigit:]]{2}:){5}[[:xdigit:]]{2}).*, vlan (\d+), spoof checking (\w+)`, vfID))
-
- // IP link command doesn't show the vlan property if its set to 0, so we need to detect that.
- // Try and match: "vf 1 MAC 00:00:00:00:00:00, spoof checking off"
- reNoVlan := regexp.MustCompile(fmt.Sprintf(`vf %d MAC ((?:[[:xdigit:]]{2}:){5}[[:xdigit:]]{2}).*, spoof checking (\w+)`, vfID))
- scanner := bufio.NewScanner(stdout)
- for scanner.Scan() {
- // First try and find VF and reads its properties with VLAN activated.
- res := reVlan.FindStringSubmatch(scanner.Text())
- if len(res) == 4 {
- vlan, err := strconv.Atoi(res[2])
- if err != nil {
- return vf, err
- }
-
- vf.mac = res[1]
- vf.vlan = vlan
- vf.spoofcheck = shared.IsTrue(res[3])
- return vf, err
- }
-
- // Next try and find VF and reads its properties with VLAN missing.
- res = reNoVlan.FindStringSubmatch(scanner.Text())
- if len(res) == 3 {
- vf.mac = res[1]
- vf.vlan = 0 // Missing VLAN ID means 0 when resetting later.
- vf.spoofcheck = shared.IsTrue(res[2])
- return vf, err
- }
- }
- if err = scanner.Err(); err != nil {
- return
- }
-
- return vf, fmt.Errorf("no matching virtual function found")
-}
-
-// networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device.
-func networkGetVFDevicePCISlot(parentName string, vfID string) (string, error) {
- file, err := os.Open(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", parentName, vfID))
- if err != nil {
- return "", err
- }
- defer file.Close()
-
- scanner := bufio.NewScanner(file)
- for scanner.Scan() {
- // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
- fields := strings.SplitN(scanner.Text(), "=", 2)
- if len(fields) == 2 && fields[0] == "PCI_SLOT_NAME" {
- return fields[1], nil
- }
- }
- if err := scanner.Err(); err != nil {
- return "", err
- }
-
- return "", fmt.Errorf("PCI_SLOT_NAME not found")
-}
-
-// networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys.
-func networkGetVFDeviceDriverPath(parentName string, vfID string) (string, error) {
- return filepath.EvalSymlinks(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/driver", parentName, vfID))
-}
-
-// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver path.
-func networkDeviceUnbind(pciSlotName string, driverPath string) error {
- return ioutil.WriteFile(fmt.Sprintf("%s/unbind", driverPath), []byte(pciSlotName), 0600)
-}
-
-// networkDeviceUnbind binds a network device to the OS using its PCI Slot Name and driver path.
-func networkDeviceBind(pciSlotName string, driverPath string) error {
- return ioutil.WriteFile(fmt.Sprintf("%s/bind", driverPath), []byte(pciSlotName), 0600)
-}
-
-// networkDeviceBindWait waits for network interface to appear after being binded.
-func networkDeviceBindWait(devName string) error {
- for i := 0; i < 10; i++ {
- if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", devName)) {
- return nil
- }
-
- time.Sleep(50 * time.Millisecond)
- }
-
- return fmt.Errorf("Bind of interface \"%s\" took too long", devName)
-}
-
-// networkDHCPv4Release sends a DHCPv4 release packet to a DHCP server.
-func networkDHCPv4Release(srcMAC net.HardwareAddr, srcIP net.IP, dstIP net.IP) error {
- dstAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:67", dstIP.String()))
- if err != nil {
- return err
- }
- conn, err := net.DialUDP("udp", nil, dstAddr)
- if err != nil {
- return err
- }
- defer conn.Close()
-
- //Random DHCP transaction ID
- xid := rand.Uint32()
-
- // Construct a DHCP packet pretending to be from the source IP and MAC supplied.
- dhcp := layers.DHCPv4{
- Operation: layers.DHCPOpRequest,
- HardwareType: layers.LinkTypeEthernet,
- ClientHWAddr: srcMAC,
- ClientIP: srcIP,
- Xid: xid,
- }
-
- // Add options to DHCP release packet.
- dhcp.Options = append(dhcp.Options,
- layers.NewDHCPOption(layers.DHCPOptMessageType, []byte{byte(layers.DHCPMsgTypeRelease)}),
- layers.NewDHCPOption(layers.DHCPOptServerID, dstIP.To4()),
- )
-
- buf := gopacket.NewSerializeBuffer()
- opts := gopacket.SerializeOptions{
- ComputeChecksums: true,
- FixLengths: true,
- }
-
- err = gopacket.SerializeLayers(buf, opts, &dhcp)
- if err != nil {
- return err
- }
-
- _, err = conn.Write(buf.Bytes())
- return err
-}
-
-// networkDHCPv6Release sends a DHCPv6 release packet to a DHCP server.
-func networkDHCPv6Release(srcDUID string, srcIAID string, srcIP net.IP, dstIP net.IP, dstDUID string) error {
- dstAddr, err := net.ResolveUDPAddr("udp6", fmt.Sprintf("[%s]:547", dstIP.String()))
- if err != nil {
- return err
- }
- conn, err := net.DialUDP("udp6", nil, dstAddr)
- if err != nil {
- return err
- }
- defer conn.Close()
-
- // Construct a DHCPv6 packet pretending to be from the source IP and MAC supplied.
- dhcp := layers.DHCPv6{
- MsgType: layers.DHCPv6MsgTypeRelease,
- }
-
- // Convert Server DUID from string to byte array
- dstDUIDRaw, err := hex.DecodeString(strings.Replace(dstDUID, ":", "", -1))
- if err != nil {
- return err
- }
-
- // Convert DUID from string to byte array
- srcDUIDRaw, err := hex.DecodeString(strings.Replace(srcDUID, ":", "", -1))
- if err != nil {
- return err
- }
-
- // Convert IAID string to int
- srcIAIDRaw, err := strconv.Atoi(srcIAID)
- if err != nil {
- return err
- }
-
- // Build the Identity Association details option manually (as not provided by gopacket).
- iaAddr := networkDHCPv6CreateIAAddress(srcIP)
- ianaRaw := networkDHCPv6CreateIANA(srcIAIDRaw, iaAddr)
-
- // Add options to DHCP release packet.
- dhcp.Options = append(dhcp.Options,
- layers.NewDHCPv6Option(layers.DHCPv6OptServerID, dstDUIDRaw),
- layers.NewDHCPv6Option(layers.DHCPv6OptClientID, srcDUIDRaw),
- layers.NewDHCPv6Option(layers.DHCPv6OptIANA, ianaRaw),
- )
-
- buf := gopacket.NewSerializeBuffer()
- opts := gopacket.SerializeOptions{
- ComputeChecksums: true,
- FixLengths: true,
- }
-
- err = gopacket.SerializeLayers(buf, opts, &dhcp)
- if err != nil {
- return err
- }
-
- _, err = conn.Write(buf.Bytes())
- return err
-}
-
-// networkDHCPv6CreateIANA creates a DHCPv6 Identity Association for Non-temporary Address (rfc3315 IA_NA) option.
-func networkDHCPv6CreateIANA(IAID int, IAAddr []byte) []byte {
- data := make([]byte, 12)
- binary.BigEndian.PutUint32(data[0:4], uint32(IAID)) // Identity Association Identifier
- binary.BigEndian.PutUint32(data[4:8], uint32(0)) // T1
- binary.BigEndian.PutUint32(data[8:12], uint32(0)) // T2
- data = append(data, IAAddr...) // Append the IA Address details
- return data
-}
-
-// networkDHCPv6CreateIAAddress creates a DHCPv6 Identity Association Address (rfc3315) option.
-func networkDHCPv6CreateIAAddress(IP net.IP) []byte {
- data := make([]byte, 28)
- binary.BigEndian.PutUint16(data[0:2], uint16(layers.DHCPv6OptIAAddr)) // Sub-Option type
- binary.BigEndian.PutUint16(data[2:4], uint16(24)) // Length (fixed at 24 bytes)
- copy(data[4:20], IP) // IPv6 address to be released
- binary.BigEndian.PutUint32(data[20:24], uint32(0)) // Preferred liftetime
- binary.BigEndian.PutUint32(data[24:28], uint32(0)) // Valid lifetime
- return data
-}
From c4e08253403f61ef7dad9f590339b7bd694d75cd Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 22 Jul 2019 10:13:40 +0100
Subject: [PATCH 22/32] container/lxc: Links device interface into LXD, removes
unused functions
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container_lxc.go | 2087 +++++++-----------------------------------
1 file changed, 341 insertions(+), 1746 deletions(-)
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 1a1d2db062..715701f89f 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2,8 +2,6 @@ package main
import (
"bufio"
- "bytes"
- "encoding/hex"
"encoding/json"
"fmt"
"io"
@@ -33,7 +31,6 @@ import (
"github.com/lxc/lxd/lxd/db/query"
"github.com/lxc/lxd/lxd/device"
"github.com/lxc/lxd/lxd/device/config"
- "github.com/lxc/lxd/lxd/dnsmasq"
"github.com/lxc/lxd/lxd/instance"
"github.com/lxc/lxd/lxd/iptables"
"github.com/lxc/lxd/lxd/maas"
@@ -500,18 +497,27 @@ func containerLXCCreate(s *state.State, args db.ContainerArgs) (container, error
return nil, err
}
- // Update MAAS
if !c.IsSnapshot() {
+ // Update MAAS
err = c.maasUpdate(false)
if err != nil {
c.Delete()
logger.Error("Failed creating container", ctxMap)
return nil, err
}
- }
- // Update lease files
- networkUpdateStatic(s, "")
+ // Add NIC devices to container.
+ for k, m := range c.expandedDevices {
+ if m["type"] != "nic" {
+ continue
+ }
+
+ err = c.deviceAdd(k, m)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
logger.Info("Created container", ctxMap)
eventSendLifecycle(c.project, "container-created",
@@ -1634,7 +1640,6 @@ func (c *containerLXC) initLXC(config bool) error {
}
// Setup devices
- networkidx := 0
for _, k := range c.expandedDevices.DeviceNames() {
m := c.expandedDevices[k]
if shared.StringInSlice(m["type"], []string{"unix-char", "unix-block"}) {
@@ -1666,115 +1671,6 @@ func (c *containerLXC) initLXC(config bool) error {
if err != nil {
return err
}
- } else if m["type"] == "nic" || m["type"] == "infiniband" {
- // Fill in some fields from volatile
- m, err = c.fillNetworkDevice(k, m)
- if err != nil {
- return err
- }
-
- networkKeyPrefix := "lxc.net"
- if !util.RuntimeLiblxcVersionAtLeast(2, 1, 0) {
- networkKeyPrefix = "lxc.network"
- }
-
- // Interface type specific configuration
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "veth")
- if err != nil {
- return err
- }
- } else if m["nictype"] == "physical" || m["nictype"] == "sriov" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "phys")
- if err != nil {
- return err
- }
- } else if m["nictype"] == "macvlan" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "macvlan")
- if err != nil {
- return err
- }
-
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.macvlan.mode", networkKeyPrefix, networkidx), "bridge")
- if err != nil {
- return err
- }
- } else if m["nictype"] == "ipvlan" {
- err = c.initLXCIPVLAN(cc, networkKeyPrefix, networkidx, m)
- if err != nil {
- return err
- }
- }
-
- // Run network up hook for bridged and p2p nics.
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.script.up", networkKeyPrefix, networkidx), fmt.Sprintf("/proc/%d/exe callhook %s %d network-up %s", os.Getpid(), shared.VarPath(""), c.id, k))
- if err != nil {
- return err
- }
- }
-
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.flags", networkKeyPrefix, networkidx), "up")
- if err != nil {
- return err
- }
-
- if m["nictype"] == "bridged" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), m["parent"])
- if err != nil {
- return err
- }
- } else if m["nictype"] == "sriov" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), m["host_name"])
- if err != nil {
- return err
- }
- } else if shared.StringInSlice(m["nictype"], []string{"macvlan", "ipvlan", "physical"}) {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), device.NetworkGetHostDevice(m["parent"], m["vlan"]))
- if err != nil {
- return err
- }
- }
-
- // Host Virtual NIC name
- vethName := ""
- if m["host_name"] != "" && shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- vethName = m["host_name"]
- }
-
- if vethName != "" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.veth.pair", networkKeyPrefix, networkidx), vethName)
- if err != nil {
- return err
- }
- }
-
- // MAC address
- if m["hwaddr"] != "" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.hwaddr", networkKeyPrefix, networkidx), m["hwaddr"])
- if err != nil {
- return err
- }
- }
-
- // MTU
- if m["mtu"] != "" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.mtu", networkKeyPrefix, networkidx), m["mtu"])
- if err != nil {
- return err
- }
- }
-
- // Name
- if m["name"] != "" {
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.name", networkKeyPrefix, networkidx), m["name"])
- if err != nil {
- return err
- }
- }
-
- // bump network index
- networkidx++
} else if m["type"] == "disk" {
isRootfs := shared.IsRootDiskDevice(m)
@@ -1929,95 +1825,214 @@ func (c *containerLXC) initLXC(config bool) error {
return nil
}
-// initLXCIPVLAN runs as part of initLXC function and initialises liblxc with the IPVLAN config.
-func (c *containerLXC) initLXCIPVLAN(cc *lxc.Container, networkKeyPrefix string, networkidx int, m map[string]string) error {
- err := c.checkIPVLANSupport()
+// deviceLoad instantiates and validates a new device and returns it along with enriched config.
+func (c *containerLXC) deviceLoad(deviceName string, rawConfig map[string]string) (device.Device, map[string]string, error) {
+ // Fill in some fields from volatile.
+ filledConfig, err := c.fillNetworkDevice(deviceName, rawConfig)
if err != nil {
- return err
+ return nil, nil, err
}
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "ipvlan")
+ d, err := device.New(c, c.state, filledConfig, c.deviceVolatileGetFunc(deviceName), c.deviceVolatileSetFunc(deviceName))
if err != nil {
- return err
+ return nil, nil, err
}
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipvlan.mode", networkKeyPrefix, networkidx), "l3s")
+ return d, filledConfig, nil
+}
+
+// deviceAdd loads a new device and calls its Setup() function.
+func (c *containerLXC) deviceAdd(deviceName string, rawConfig map[string]string) error {
+ d, _, err := c.deviceLoad(deviceName, rawConfig)
if err != nil {
return err
}
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipvlan.isolation", networkKeyPrefix, networkidx), "bridge")
+ return d.Add()
+}
+
+// deviceStart loads a new device and calls its Start() function.
+func (c *containerLXC) deviceStart(deviceName string, rawConfig map[string]string, isRunning bool) (*device.RunConfig, error) {
+ d, filledConfig, err := c.deviceLoad(deviceName, rawConfig)
if err != nil {
- return err
+ return nil, err
+ }
+
+ if filledConfig["parent"] != "" && !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", filledConfig["parent"])) {
+ return nil, fmt.Errorf("Parent device '%s' doesn't exist", filledConfig["parent"])
+ }
+
+ if canHotPlug, _ := d.CanHotPlug(); isRunning && !canHotPlug {
+ return nil, fmt.Errorf("Device cannot be started when container is running")
}
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.l2proxy", networkKeyPrefix, networkidx), "1")
+ runConfig, err := d.Start()
if err != nil {
- return err
+ return nil, err
}
- if m["ipv4.address"] != "" {
- //Check necessary sysctls are configured for use with l2proxy parent in IPVLAN l3s mode.
- ipv4FwdPath := fmt.Sprintf("ipv4/conf/%s/forwarding", m["parent"])
- sysctlVal, err := device.NetworkSysctlGet(ipv4FwdPath)
- if err != nil {
- return errors.Wrapf(err, "Error reading net sysctl %s", ipv4FwdPath)
+ // If container is running, perform live attach of interface to container.
+ if isRunning {
+ devName := ""
+ for _, dev := range runConfig.NetworkInterfaces[0] {
+ if dev.Key == "link" {
+ devName = dev.Value
+ break
+ }
}
- if sysctlVal != "1\n" {
- return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv4.conf.%s.forwarding=1", m["parent"])
+
+ if devName == "" {
+ return nil, fmt.Errorf("Device didn't provide a link property to use")
}
- for _, addr := range strings.Split(m["ipv4.address"], ",") {
- addr = strings.TrimSpace(addr)
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipv4.address", networkKeyPrefix, networkidx), fmt.Sprintf("%s/32", addr))
- if err != nil {
- return err
- }
+ // Load the go-lxc struct.
+ err = c.initLXC(false)
+ if err != nil {
+ return nil, err
}
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipv4.gateway", networkKeyPrefix, networkidx), "dev")
+ // Add the interface to the container.
+ err = c.c.AttachInterface(devName, filledConfig["name"])
if err != nil {
- return err
+ return nil, fmt.Errorf("Failed to attach interface: %s to %s: %s", devName, filledConfig["name"], err)
}
}
- if m["ipv6.address"] != "" {
- //Check necessary sysctls are configured for use with l2proxy parent in IPVLAN l3s mode.
- ipv6FwdPath := fmt.Sprintf("ipv6/conf/%s/forwarding", m["parent"])
- sysctlVal, err := device.NetworkSysctlGet(ipv6FwdPath)
+ return runConfig, nil
+}
+
+// deviceUpdate loads a new device and calls its Update() function.
+func (c *containerLXC) deviceUpdate(deviceName string, rawConfig map[string]string, oldConfig map[string]string, isRunning bool) error {
+ d, _, err := c.deviceLoad(deviceName, rawConfig)
+ if err != nil {
+ return err
+ }
+
+ err = d.Update(oldConfig, isRunning)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// deviceStop loads a new device and calls its Stop() function.
+func (c *containerLXC) deviceStop(deviceName string, rawConfig map[string]string, stopHookNetnsPath string) error {
+ d, filledConfg, err := c.deviceLoad(deviceName, rawConfig)
+ if err != nil {
+ return err
+ }
+
+ if canHotPlug, _ := d.CanHotPlug(); stopHookNetnsPath == "" && !canHotPlug {
+ return fmt.Errorf("Device cannot be stopped when container is running")
+ }
+
+ hostName := c.localConfig[fmt.Sprintf("volatile.%s.host_name", deviceName)]
+ hostNameExists := hostName != "" && shared.PathExists(fmt.Sprintf("/sys/class/net/%s", hostName))
+
+ // If container is running, perform live detach of interface back to host.
+ if stopHookNetnsPath == "" {
+ // For some reason, having network config confuses detach, so get our own go-lxc struct.
+ cname := project.Prefix(c.Project(), c.Name())
+ cc, err := lxc.NewContainer(cname, c.state.OS.LxcPath)
if err != nil {
- return errors.Wrapf(err, "Error reading net sysctl %s", ipv6FwdPath)
- }
- if sysctlVal != "1\n" {
- return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv6.conf.%s.forwarding=1", m["parent"])
+ return err
}
+ defer cc.Release()
- ipv6ProxyNdpPath := fmt.Sprintf("ipv6/conf/%s/proxy_ndp", m["parent"])
- sysctlVal, err = device.NetworkSysctlGet(ipv6ProxyNdpPath)
+ // Get interfaces inside container.
+ ifaces, err := cc.Interfaces()
if err != nil {
- return errors.Wrapf(err, "Error reading net sysctl %s", ipv6ProxyNdpPath)
- }
- if sysctlVal != "1\n" {
- return fmt.Errorf("IPVLAN in L3S mode requires sysctl net.ipv6.conf.%s.proxy_ndp=1", m["parent"])
+ return fmt.Errorf("Failed to list network interfaces: %v", err)
}
- for _, addr := range strings.Split(m["ipv6.address"], ",") {
- addr = strings.TrimSpace(addr)
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipv6.address", networkKeyPrefix, networkidx), fmt.Sprintf("%s/128", addr))
+ // Remove the interface from the container if it exists inside container.
+ if shared.StringInSlice(filledConfg["name"], ifaces) {
+ detachHostName := hostName
+
+ // If the host_name is empty or already exists, we need to detach to a
+ // random device name, so generate one here.
+ if hostName == "" || hostNameExists {
+ detachHostName = device.NetworkRandomDevName("lxd")
+ }
+
+ err = cc.DetachInterfaceRename(filledConfg["name"], detachHostName)
if err != nil {
- return err
+ return errors.Wrapf(err, "Failed to detach interface: %s to %s", filledConfg["name"], detachHostName)
}
- }
- err = lxcSetConfigItem(cc, fmt.Sprintf("%s.%d.ipv6.gateway", networkKeyPrefix, networkidx), "dev")
- if err != nil {
- return err
+ // If we have detached the device to a random host_name it is our
+ // responsibility to delete the device as there is no other record of this.
+ if hostName == "" || hostNameExists {
+ // Attempt to remove device, but don't return on failure as Stop()
+ // still needs to be called.
+ err := device.NetworkRemoveInterface(detachHostName)
+ if err != nil {
+ logger.Errorf("Error removing interface: %s: %v", detachHostName, err)
+ }
+ }
+ }
+ } else {
+ // Currently liblxc does not move devices back to the host on stop that were added
+ // after the the container was started. For this reason we utilise the lxc.hook.stop
+ // hook so that we can capture the netns path, enter the namespace and move the nics
+ // back to the host and rename them if liblxc hasn't already done it.
+ // We can only move back devices that have an expected host_name record and where
+ // that device doesn't already exist on the host.
+ if hostName != "" && !hostNameExists {
+ err := c.detachInterfaceRename(stopHookNetnsPath, filledConfg["name"], hostName)
+ if err != nil {
+ return errors.Wrapf(err, "Failed to detach interface: %s to %s", filledConfg["name"], hostName)
+ }
}
}
+ err = d.Stop()
+ if err != nil {
+ return err
+ }
+
return nil
}
+// deviceRemove loads a new device and calls its Remove() function.
+func (c *containerLXC) deviceRemove(deviceName string, rawConfig map[string]string) error {
+ d, _, err := c.deviceLoad(deviceName, rawConfig)
+ if err != nil {
+ return err
+ }
+
+ return d.Remove()
+}
+
+// deviceVolatileGetFunc returns a function that retrieves a named device's volatile config and
+// removes its device prefix from the keys.
+func (c *containerLXC) deviceVolatileGetFunc(devName string) func() map[string]string {
+ return func() map[string]string {
+ volatile := make(map[string]string)
+ prefix := fmt.Sprintf("volatile.%s.", devName)
+ for k, v := range c.localConfig {
+ if strings.HasPrefix(k, prefix) {
+ volatile[strings.TrimPrefix(k, prefix)] = v
+ }
+ }
+ return volatile
+ }
+}
+
+// deviceVolatileSetFunc returns a function that can be called to save a named device's volatile
+// config using keys that do not have the device's name prefixed.
+func (c *containerLXC) deviceVolatileSetFunc(devName string) func(save map[string]string) error {
+ return func(save map[string]string) error {
+ volatileSave := make(map[string]string)
+ for k, v := range save {
+ volatileSave[fmt.Sprintf("volatile.%s.%s", devName, k)] = v
+ }
+
+ return c.VolatileSet(volatileSave)
+ }
+}
+
// Initialize storage interface for this container
func (c *containerLXC) initStorage() error {
if c.storage != nil {
@@ -2338,6 +2353,7 @@ func (c *containerLXC) startCommon() (string, error) {
diskDevices := map[string]config.Device{}
// Create the devices
+ nicID := -1
for _, k := range c.expandedDevices.DeviceNames() {
m := c.expandedDevices[k]
if shared.StringInSlice(m["type"], []string{"unix-char", "unix-block"}) {
@@ -2480,6 +2496,24 @@ func (c *containerLXC) startCommon() (string, error) {
return "", err
}
+ // Use new Device interface if supported.
+ nicID++
+ runConfig, err := c.deviceStart(k, m, false)
+ if err != device.ErrUnsupportedDevType {
+ if err != nil {
+ return "", err
+ }
+
+ for _, dev := range runConfig.NetworkInterfaces[0] {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.%s", networkKeyPrefix, nicID, dev.Key), dev.Value)
+ if err != nil {
+ return "", err
+ }
+ }
+
+ continue
+ }
+
networkidx := -1
reserved := []string{}
// Record nictype == physical devices since those won't
@@ -2514,19 +2548,6 @@ func (c *containerLXC) startCommon() (string, error) {
continue
}
- // Fill in some fields from volatile and setup the virtual function
- if m["nictype"] == "sriov" {
- m, err = c.fillSriovNetworkDevice(dName, m, reserved)
- if err != nil {
- return "", err
- }
-
- err = c.setupSriovParent(dName, m)
- if err != nil {
- return "", err
- }
- }
-
// Make sure that no one called dibs.
reserved = append(reserved, m["host_name"])
@@ -2567,14 +2588,6 @@ func (c *containerLXC) startCommon() (string, error) {
return "", err
}
}
-
- // Create VLAN device on parent
- if shared.StringInSlice(m["nictype"], []string{"macvlan", "ipvlan", "physical"}) {
- _, err := c.setupPhysicalParent(k, m)
- if err != nil {
- return "", err
- }
- }
}
}
@@ -2693,47 +2706,6 @@ func (c *containerLXC) snapshotPhysicalNic(deviceName string, hostName string, v
return nil
}
-// setupPhysicalParent creates a VLAN device on parent if needed and tracks original properties of
-// the physical device if not just created so they can be restored when the device is detached.
-// Returns the parent device name detected.
-func (c *containerLXC) setupPhysicalParent(deviceName string, m config.Device) (string, error) {
- if m["parent"] == "" {
- return "", errors.New("No parent property on device")
- }
-
- hostName := device.NetworkGetHostDevice(m["parent"], m["vlan"])
- createdDev, err := device.NetworkCreateVlanDeviceIfNeeded(m["parent"], hostName, m["vlan"])
- if err != nil {
- return hostName, err
- }
-
- // If we are passing the parent device into the container, we need to save properties
- // of the original device in case they are changed during their time inside the container,
- // so that we can restore those properties when the device is detached from the container.
- if m["nictype"] == "physical" {
- createdKey := "volatile." + deviceName + ".last_state.created"
- volatile := map[string]string{
- createdKey: fmt.Sprintf("%t", createdDev),
- }
-
- // If we didn't create the device we should track various properties so we can
- // restore them when the container is stopped or the device is detached.
- if createdDev == false {
- err = c.snapshotPhysicalNic(deviceName, hostName, volatile)
- if err != nil {
- return hostName, err
- }
- }
-
- err = c.VolatileSet(volatile)
- if err != nil {
- return hostName, err
- }
- }
-
- return hostName, nil
-}
-
// detachInterfaceRename enters the container's network namespace and moves the named interface
// in ifName back to the network namespace of the running process as the name specified in hostName.
func (c *containerLXC) detachInterfaceRename(netns string, ifName string, hostName string) error {
@@ -2758,371 +2730,43 @@ func (c *containerLXC) detachInterfaceRename(netns string, ifName string, hostNa
return nil
}
-// restorePhysicalNic uses the data in c.localConfig to restore physical nic properties from the
-// volatile data gathered when the device was attached.
-func (c *containerLXC) restorePhysicalNic(deviceName string, hostName string) error {
- createdKey := "volatile." + deviceName + ".last_state.created"
- mtuKey := "volatile." + deviceName + ".last_state.mtu"
- macKey := "volatile." + deviceName + ".last_state.hwaddr"
+func (c *containerLXC) Start(stateful bool) error {
+ var ctxMap log.Ctx
- // If we created the "physical" device and then it should be removed.
- if shared.IsTrue(c.localConfig[createdKey]) {
- return device.NetworkRemoveInterface(hostName)
+ // Setup a new operation
+ op, err := c.createOperation("start", false, false)
+ if err != nil {
+ return errors.Wrap(err, "Create container start operation")
}
+ defer op.Done(nil)
- // Bring the interface down, as this is sometimes needed to change settings on the nic.
- _, err := shared.RunCommand("ip", "link", "set", "dev", hostName, "down")
+ err = setupSharedMounts()
if err != nil {
- return fmt.Errorf("Failed to bring down \"%s\": %v", hostName, err)
+ return fmt.Errorf("Daemon failed to setup shared mounts base: %s.\nDoes security.nesting need to be turned on?", err)
}
- // If MTU value is specified then there is an original MTU that needs restoring.
- if c.localConfig[mtuKey] != "" {
- mtuInt, err := strconv.ParseUint(c.localConfig[mtuKey], 10, 32)
- if err != nil {
- return fmt.Errorf("Failed to convert mtu for \"%s\" mtu \"%s\": %v", hostName, c.localConfig[mtuKey], err)
- }
-
- err = device.NetworkSetDevMTU(hostName, mtuInt)
- if err != nil {
- return fmt.Errorf("Failed to restore physical dev \"%s\" mtu to \"%d\": %v", hostName, mtuInt, err)
- }
+ // Run the shared start code
+ configPath, err := c.startCommon()
+ if err != nil {
+ return errors.Wrap(err, "Common start logic")
}
- // If MAC value is specified then there is an original MAC that needs restoring.
- if c.localConfig[macKey] != "" {
- err := device.NetworkSetDevMAC(hostName, c.localConfig[macKey])
- if err != nil {
- return fmt.Errorf("Failed to restore physical dev \"%s\" mac to \"%s\": %v", hostName, c.localConfig[macKey], err)
- }
+ // Ensure that the container storage volume is mounted.
+ _, err = c.StorageStart()
+ if err != nil {
+ return errors.Wrap(err, "Storage start")
}
- return nil
-}
+ ctxMap = log.Ctx{
+ "project": c.project,
+ "name": c.name,
+ "action": op.action,
+ "created": c.creationDate,
+ "ephemeral": c.ephemeral,
+ "used": c.lastUsedDate,
+ "stateful": stateful}
-// restorePhysicalParent restores parent device settings when removed from a container using the
-// volatile data that was stored when the device was first added with setupPhysicalParent().
-func (c *containerLXC) restorePhysicalParent(deviceName string, m config.Device) {
- // Clear volatile data when function finishes.
- defer func() {
- // Volatile keys used for parent restore.
- createdKey := "volatile." + deviceName + ".last_state.created"
- mtuKey := "volatile." + deviceName + ".last_state.mtu"
- macKey := "volatile." + deviceName + ".last_state.hwaddr"
-
- err := c.VolatileSet(map[string]string{createdKey: "", mtuKey: "", macKey: ""})
- if err != nil {
- logger.Errorf("Failed to remove volatile config for %s: %v", deviceName, err)
- }
- }()
-
- // Nothing to do if we don't know the original device name.
- hostName := device.NetworkGetHostDevice(m["parent"], m["vlan"])
- if hostName == "" {
- return
- }
-
- err := c.restorePhysicalNic(deviceName, hostName)
- if err != nil {
- logger.Errorf("%v", err)
- }
-}
-
-// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and tracks original
-// properties of the physical device for restoration on detach.
-func (c *containerLXC) setupSriovParent(deviceName string, m config.Device) error {
- // Check for required fields in device config.
- if m["parent"] == "" {
- return fmt.Errorf("Missing parent for 'sriov' nic '%s'", deviceName)
- }
-
- if m["vfID"] == "" {
- return fmt.Errorf("Missing vfID for 'sriov' nic '%s'", deviceName)
- }
-
- hostNameKey := "volatile." + deviceName + ".host_name"
- createdKey := "volatile." + deviceName + ".last_state.created"
- macKey := "volatile." + deviceName + ".last_state.hwaddr"
- vfIDKey := "volatile." + deviceName + ".last_state.vf.id"
- vfMacKey := "volatile." + deviceName + ".last_state.vf.hwaddr"
- vfVlanKey := "volatile." + deviceName + ".last_state.vf.vlan"
- vfSpoofCheckKey := "volatile." + deviceName + ".last_state.vf.spoofcheck"
-
- // Record properties of virtual function settings on parent.
- vfID, err := strconv.Atoi(m["vfID"])
- if err != nil {
- return err
- }
-
- vfInfo, err := networkGetVirtFuncInfo(m["parent"], vfID)
- if err != nil {
- return err
- }
-
- volatile := map[string]string{
- hostNameKey: m["host_name"],
- createdKey: "false",
- vfIDKey: m["vfID"],
- vfMacKey: vfInfo.mac,
- vfVlanKey: fmt.Sprintf("%d", vfInfo.vlan),
- vfSpoofCheckKey: fmt.Sprintf("%t", vfInfo.spoofcheck),
- }
-
- // Record properties of VF instance device.
- err = c.snapshotPhysicalNic(deviceName, m["host_name"], volatile)
- if err != nil {
- return err
- }
-
- // Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
- vfPCISlot, err := networkGetVFDevicePCISlot(m["parent"], m["vfID"])
- if err != nil {
- return err
- }
-
- // Get the path to the VF device's driver now, as once it is unbound we won't be able to
- // determine its driver path in order to rebind it.
- vfDriverPath, err := networkGetVFDeviceDriverPath(m["parent"], m["vfID"])
- if err != nil {
- return err
- }
-
- // Unbind VF device from the host so that the settings will take effect when we rebind it.
- err = networkDeviceUnbind(vfPCISlot, vfDriverPath)
- if err != nil {
- return err
- }
-
- // However we return from this function, we must try to rebind the VF so its not orphaned.
- // The OS won't let an already bound device be bound again so is safe to call twice.
- defer networkDeviceBind(vfPCISlot, vfDriverPath)
-
- // Setup VF VLAN if specified.
- if m["vlan"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", m["vfID"], "vlan", m["vlan"])
- if err != nil {
- return err
- }
- }
-
- // Setup VF MAC spoofing protection if specified.
- // The ordering of this section is very important, as Intel cards require a very specific
- // order of setup to allow LXD to set custom MACs when using spoof check mode.
- if shared.IsTrue(m["security.mac_filtering"]) {
- // If no MAC specified in config, use current VF interface MAC.
- mac := m["hwaddr"]
- if mac == "" {
- mac = volatile[macKey]
- }
-
- // Set MAC on VF (this combined with spoof checking prevents any other MAC being used).
- _, err = shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", m["vfID"], "mac", mac)
- if err != nil {
- return err
- }
-
- // Now that MAC is set on VF, we can enable spoof checking.
- _, err = shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", m["vfID"], "spoofchk", "on")
- if err != nil {
- return err
- }
- } else {
- // Reset VF to ensure no previous MAC restriction exists.
- _, err := shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", m["vfID"], "mac", "00:00:00:00:00:00")
- if err != nil {
- return err
- }
-
- // Ensure spoof checking is disabled if not enabled in container.
- _, err = shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", m["vfID"], "spoofchk", "off")
- if err != nil {
- return err
- }
- }
-
- // Bind VF device onto the host so that the settings will take effect.
- err = networkDeviceBind(vfPCISlot, vfDriverPath)
- if err != nil {
- return err
- }
-
- // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and
- // it will re-appear shortly after. Unfortunately the time between sending the bind event
- // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
- // otherwise next steps of applying settings to interface will fail.
- err = networkDeviceBindWait(m["host_name"])
- if err != nil {
- return err
- }
-
- // If no errors, then save the volatile config.
- err = c.VolatileSet(volatile)
- if err != nil {
- return err
- }
-
- return err
-}
-
-// restoreSriovParent restores SR-IOV parent device settings when removed from a container using the
-// volatile data that was stored when the device was first added with setupSriovParent().
-func (c *containerLXC) restoreSriovParent(deviceName string, m config.Device) {
- // Volatile keys used for parent restore.
- hostNameKey := "volatile." + deviceName + ".host_name"
- vfIDKey := "volatile." + deviceName + ".last_state.vf.id"
- vfMacKey := "volatile." + deviceName + ".last_state.vf.hwaddr"
- vfVlanKey := "volatile." + deviceName + ".last_state.vf.vlan"
- vfSpoofCheckKey := "volatile." + deviceName + ".last_state.vf.spoofcheck"
-
- // Clear volatile data when function finishes.
- defer func() {
- // Volatile keys used for parent instance restore.
- createdKey := "volatile." + deviceName + ".last_state.created"
- mtuKey := "volatile." + deviceName + ".last_state.mtu"
- macKey := "volatile." + deviceName + ".last_state.hwaddr"
-
- volatile := map[string]string{
- createdKey: "",
- mtuKey: "",
- macKey: "",
- hostNameKey: "",
- vfIDKey: "",
- vfMacKey: "",
- vfVlanKey: "",
- vfSpoofCheckKey: "",
- }
-
- err := c.VolatileSet(volatile)
- if err != nil {
- logger.Errorf("Failed to remove volatile config for %s: %v", deviceName, err)
- }
- }()
-
- // Nothing to do if we don't know the original device name or the VF ID.
- if c.localConfig[hostNameKey] == "" || c.localConfig[vfIDKey] == "" || m["parent"] == "" {
- return
- }
-
- // Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
- vfPCISlot, err := networkGetVFDevicePCISlot(m["parent"], c.localConfig[vfIDKey])
- if err != nil {
- logger.Errorf("Failed to get sriov VF PCI slot \"%s.%s\": %v", m["parent"], c.localConfig[vfIDKey], err)
- return
- }
-
- // Get the path to the VF device's driver now, as once it is unbound we won't be able to
- // determine its driver path in order to rebind it.
- vfDriverPath, err := networkGetVFDeviceDriverPath(m["parent"], c.localConfig[vfIDKey])
- if err != nil {
- logger.Errorf("Failed to get sriov VF driver path \"%s.%s\": %v", m["parent"], c.localConfig[vfIDKey], err)
- return
- }
-
- // Unbind VF device from the host so that the settings will take effect when we rebind it.
- err = networkDeviceUnbind(vfPCISlot, vfDriverPath)
- if err != nil {
- return
- }
-
- // However we return from this function, we must try to rebind the VF so its not orphaned.
- // The OS won't let an already bound device be bound again so is safe to call twice.
- defer networkDeviceBind(vfPCISlot, vfDriverPath)
-
- // Reset VF VLAN if specified
- if c.localConfig[vfVlanKey] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", c.localConfig[vfIDKey], "vlan", c.localConfig[vfVlanKey])
- if err != nil {
- logger.Errorf("Failed to restore sriov VF \"%s.%s\" vlan to \"%s\": %v", m["parent"], c.localConfig[vfIDKey], c.localConfig[vfVlanKey], err)
- return
- }
- }
-
- // Reset VF MAC spoofing protection if recorded. Do this first before resetting the MAC
- // to avoid any issues with zero MACs refusing to be set whilst spoof check is on.
- if c.localConfig[vfSpoofCheckKey] != "" {
- mode := "off"
- if shared.IsTrue(c.localConfig[vfSpoofCheckKey]) {
- mode = "on"
- }
-
- _, err := shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", c.localConfig[vfIDKey], "spoofchk", mode)
- if err != nil {
- logger.Errorf("Failed to restore sriov VF \"%s.%s\" spoofchk mode to \"%s\": %v", m["parent"], c.localConfig[vfIDKey], mode, err)
- return
- }
- }
-
- // Reset VF MAC specified if specified.
- if c.localConfig[vfMacKey] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", m["parent"], "vf", c.localConfig[vfIDKey], "mac", c.localConfig[vfMacKey])
- if err != nil {
- logger.Errorf("Failed to restore sriov VF \"%s.%s\" mac to \"%s\": %v", m["parent"], c.localConfig[vfIDKey], c.localConfig[vfMacKey], err)
- return
- }
- }
-
- // Bind VF device onto the host so that the settings will take effect.
- err = networkDeviceBind(vfPCISlot, vfDriverPath)
- if err != nil {
- logger.Errorf("Failed to bind sriov VF \"%s.%s\": %v", m["parent"], c.localConfig[vfIDKey], err)
- return
- }
-
- // Wait for VF driver to be reloaded, this will remove the VF interface from the container
- // and it will re-appear on the host. Unfortunately the time between sending the bind event
- // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
- // otherwise next step of restoring MAC and MTU settings in restorePhysicalNic will fail.
- err = networkDeviceBindWait(c.localConfig[hostNameKey])
- if err != nil {
- logger.Errorf("Failed to bind wait sriov VF \"%s.%s\": %v", m["parent"], c.localConfig[vfIDKey], err)
- return
- }
-
- // Restore VF interface settings.
- err = c.restorePhysicalNic(deviceName, c.localConfig[hostNameKey])
- if err != nil {
- logger.Errorf("%v", err)
- return
- }
-}
-
-func (c *containerLXC) Start(stateful bool) error {
- var ctxMap log.Ctx
-
- // Setup a new operation
- op, err := c.createOperation("start", false, false)
- if err != nil {
- return errors.Wrap(err, "Create container start operation")
- }
- defer op.Done(nil)
-
- err = setupSharedMounts()
- if err != nil {
- return fmt.Errorf("Daemon failed to setup shared mounts base: %s.\nDoes security.nesting need to be turned on?", err)
- }
-
- // Run the shared start code
- configPath, err := c.startCommon()
- if err != nil {
- return errors.Wrap(err, "Common start logic")
- }
-
- // Ensure that the container storage volume is mounted.
- _, err = c.StorageStart()
- if err != nil {
- return errors.Wrap(err, "Storage start")
- }
-
- ctxMap = log.Ctx{
- "project": c.project,
- "name": c.name,
- "action": op.action,
- "created": c.creationDate,
- "ephemeral": c.ephemeral,
- "used": c.lastUsedDate,
- "stateful": stateful}
-
- logger.Info("Starting container", ctxMap)
+ logger.Info("Starting container", ctxMap)
// If stateful, restore now
if stateful {
@@ -3584,9 +3228,6 @@ func (c *containerLXC) OnStop(target string) error {
logger.Error("Failed to set container state", log.Ctx{"container": c.Name(), "err": err})
}
- // Clean up networking veth devices
- c.cleanupHostVethDevices()
-
go func(c *containerLXC, target string, op *lxcContainerOperation) {
c.fromHook = false
err = nil
@@ -3636,53 +3277,6 @@ func (c *containerLXC) OnStop(target string) error {
return nil
}
-// cleanupHostVethDevices removes host side configuration for veth devices.
-func (c *containerLXC) cleanupHostVethDevices() {
- for _, k := range c.expandedDevices.DeviceNames() {
- m := c.expandedDevices[k]
- if m["type"] != "nic" || !shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- continue
- }
-
- m, err := c.fillNetworkDevice(k, m)
- if err != nil {
- logger.Error("Failed to cleanup veth device: ", log.Ctx{"container": c.Name(), "device": k, "err": err})
- continue
- }
-
- c.cleanupHostVethDevice(k, m)
- }
-}
-
-func (c *containerLXC) cleanupHostVethDevice(deviceName string, m config.Device) {
- // If not configured, check if volatile data contains the most recently added host_name.
- if m["host_name"] == "" {
- m["host_name"] = c.getVolatileHostName(deviceName)
- }
-
- // Check whether host device resolution succeeded.
- if m["host_name"] == "" {
- logger.Error("Failed to cleanup veth device: ", log.Ctx{"container": c.Name(), "device": deviceName, "err": fmt.Errorf("host_name not set")})
- }
-
- // Remove any filters
- if m["nictype"] == "bridged" {
- c.removeNetworkFilters(deviceName, m)
- }
-
- // Remove any static host side veth routes
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- c.removeNetworkRoutes(deviceName, m)
-
- // Remove volatile host_name for device
- hostNameKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
- err := c.VolatileSet(map[string]string{hostNameKey: ""})
- if err != nil {
- logger.Error("Failed to cleanup veth device: ", log.Ctx{"container": c.Name(), "device": deviceName, "err": err})
- }
- }
-}
-
// cleanupNetworkDevices performs any needed network device cleanup steps when container is stopped.
func (c *containerLXC) cleanupNetworkDevices(netns string) {
for _, k := range c.expandedDevices.DeviceNames() {
@@ -3691,127 +3285,12 @@ func (c *containerLXC) cleanupNetworkDevices(netns string) {
continue
}
- // Currently liblxc does not move devices back to the host on stop that were added
- // after the the container was started. For this reason we utilise the lxc.hook.stop
- // hook so that we can capture the netns path, enter the namespace and move the
- // physical and sriov nics back to the host and rename them.
- if shared.StringInSlice(m["nictype"], []string{"physical", "sriov"}) {
- hostName := c.localConfig[fmt.Sprintf("volatile.%s.host_name", k)]
- if hostName != "" && netns != "" && !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", hostName)) {
- err := c.detachInterfaceRename(netns, k, hostName)
- if err != nil {
- logger.Errorf("Failed to detach interface: %s to %s: %v", k, hostName, err)
- return
- }
- }
- }
-
- // Restore physical parent devices
- if m["nictype"] == "physical" {
- c.restorePhysicalParent(k, m)
- }
-
- // Restore sriov parent devices
- if m["nictype"] == "sriov" {
- c.restoreSriovParent(k, m)
- }
- }
-}
-
-// OnNetworkUp is called by the LXD callhook when the LXC network up script is run.
-func (c *containerLXC) OnNetworkUp(deviceName string, hostName string) error {
- device := c.expandedDevices[deviceName]
-
- // This hook is only for bridged and p2p nics currently.
- if !shared.StringInSlice(device["nictype"], []string{"bridged", "p2p"}) {
- return nil
- }
-
- // Record boot time host name of nic into volatile for use with routes/limits updates later.
- // Only need to do this if host_name is not specified in nic config.
- if device["host_name"] == "" {
- device["host_name"] = hostName
- hostNameKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
- err := c.VolatileSet(map[string]string{hostNameKey: hostName})
- if err != nil {
- return err
- }
- }
-
- _, err := c.setupHostVethDevice(deviceName, device, config.Device{})
-
- return err
-}
-
-// setupHostVethDevice configures a nic device's host side veth settings.
-func (c *containerLXC) setupHostVethDevice(deviceName string, device config.Device, oldDevice config.Device) ([]string, error) {
- bounceInterfaces := []string{} // A place to store interfaces we would like to be bounced.
-
- // If not configured, check if volatile data contains the most recently added host_name.
- if device["host_name"] == "" {
- device["host_name"] = c.getVolatileHostName(deviceName)
- }
-
- // If not configured, check if volatile data contains the most recently added hwaddr.
- if device["hwaddr"] == "" {
- device["hwaddr"] = c.getVolatileHwaddr(deviceName)
- }
-
- // If not configured, copy the volatile host_name into old device to support live updates.
- if oldDevice["host_name"] == "" {
- oldDevice["host_name"] = c.getVolatileHostName(deviceName)
- }
-
- // If not configured, copy the volatile host_name into old device to support live updates.
- if oldDevice["hwaddr"] == "" {
- oldDevice["hwaddr"] = c.getVolatileHwaddr(deviceName)
- }
-
- // Check whether host device resolution succeeded.
- if device["host_name"] == "" {
- return bounceInterfaces, fmt.Errorf("Failed to find host side veth name for device \"%s\"", deviceName)
- }
-
- // Remove any old network filters.
- if oldDevice["nictype"] == "bridged" && shared.IsTrue(oldDevice["security.mac_filtering"]) || shared.IsTrue(oldDevice["security.ipv4_filtering"]) || shared.IsTrue(oldDevice["security.ipv6_filtering"]) {
- c.removeNetworkFilters(deviceName, oldDevice)
- }
-
- // Setup network filters.
- if device["nictype"] == "bridged" && shared.IsTrue(device["security.mac_filtering"]) || shared.IsTrue(device["security.ipv4_filtering"]) || shared.IsTrue(device["security.ipv6_filtering"]) {
- err := c.setNetworkFilters(deviceName, device)
+ // Use the device interface if device supports it.
+ err := c.deviceStop(k, m, netns)
if err != nil {
- return bounceInterfaces, err
+ logger.Errorf("Failed to stop device: %v", err)
}
}
-
- // Refresh tc limits.
- err := c.setNetworkLimits(device)
- if err != nil {
- return bounceInterfaces, err
- }
-
- if shared.StringInSlice(oldDevice["nictype"], []string{"bridged", "p2p"}) {
- // Remove any old routes that were setup for this nic device.
- c.removeNetworkRoutes(deviceName, oldDevice)
- }
-
- // Setup static routes to container.
- err = c.setNetworkRoutes(device)
- if err != nil {
- return bounceInterfaces, err
- }
-
- // If an IPv6 address has changed, flush all existing IPv6 leases for container.
- if device["nictype"] == "bridged" && oldDevice["nictype"] == "bridged" && device["ipv6.address"] != oldDevice["ipv6.address"] {
- networkClearLease(c.Name(), device["parent"], device["hwaddr"], clearLeaseIPv6Only)
-
- // Queue the interface to be bounched once dnsmasq config has been reloaded to give
- // container a chance to detect the change and re-apply for the updated leases.
- bounceInterfaces = append(bounceInterfaces, device["host_name"])
- }
-
- return bounceInterfaces, nil
}
// Freezer functions
@@ -4414,20 +3893,15 @@ func (c *containerLXC) Delete() error {
return err
}
- // Update network files
+ // Remove NIC devices from container.
for k, m := range c.expandedDevices {
- if m["type"] != "nic" || m["nictype"] != "bridged" {
- continue
- }
-
- m, err := c.fillNetworkDevice(k, m)
- if err != nil {
+ if m["type"] != "nic" {
continue
}
- err = networkClearLease(c.name, m["parent"], m["hwaddr"], clearLeaseAll)
+ err = c.deviceRemove(k, m)
if err != nil {
- logger.Error("Failed to delete DHCP lease", log.Ctx{"name": c.Name(), "err": err, "device": k, "hwaddr": m["hwaddr"]})
+ return err
}
}
}
@@ -4452,13 +3926,6 @@ func (c *containerLXC) Delete() error {
}
}
- if !c.IsSnapshot() {
- // Remove any static lease file *after* container config has been removed from cluster.
- // This ordering is important, as if it is done earlier than c.state.Cluster.ContainerRemove
- // then the static host config is re-created and left after the container is deleted.
- networkUpdateStatic(c.state, "")
- }
-
logger.Info("Deleted container", ctxMap)
if c.IsSnapshot() {
@@ -4989,7 +4456,28 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
// Diff the devices
- removeDevices, addDevices, updateDevices, updateDiff := oldExpandedDevices.Update(c.expandedDevices)
+ removeDevices, addDevices, updateDevices, updateDiff := oldExpandedDevices.Update(c.expandedDevices, func(oldDevice config.Device, newDevice config.Device) []string {
+ // This function needs to return a list of fields that are excluded from differences
+ // between oldDevice and newDevice. The result of this is that as long as the
+ // devices are otherwise identical except for the fields returned here, then the
+ // device is considered to be being "updated" rather than "added & removed".
+ if oldDevice["type"] != newDevice["type"] || oldDevice["nictype"] != newDevice["nictype"] {
+ return []string{} // Device types aren't the same, so this cannot be an update.
+ }
+
+ d, err := device.New(c, c.state, config.Device(newDevice), nil, nil)
+ if err != device.ErrUnsupportedDevType {
+ if err != nil {
+ return []string{} // Couldn't create Device, so this cannot be an update.
+ }
+
+ _, updateFields := d.CanHotPlug()
+ return updateFields
+ }
+
+ // No fields can be live updated for this device.
+ return []string{}
+ })
// Do some validation of the config diff
err = containerValidConfig(c.state.OS, c.expandedConfig, false, true)
@@ -5135,8 +4623,11 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
}
- // Interfaces to bring down then up when changing dnsmasq config.
- var bounceInterfaces []string
+ // Use the device interface to apply update changes.
+ err = c.updateDevices(removeDevices, addDevices, updateDevices, oldExpandedDevices)
+ if err != nil {
+ return err
+ }
// Apply the live changes
if isRunning {
@@ -5436,12 +4927,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
if err != nil {
return err
}
- } else if m["type"] == "nic" || m["type"] == "infiniband" {
- err = c.removeNetworkDevice(k, m)
- if err != nil {
- return err
- }
-
+ } else if m["type"] == "infiniband" {
err = c.removeInfinibandDevices(k, m)
if err != nil {
return err
@@ -5551,7 +5037,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
} else if m["type"] == "disk" && m["path"] != "/" {
diskDevices[k] = m
- } else if m["type"] == "nic" || m["type"] == "infiniband" {
+ } else if m["type"] == "infiniband" {
var err error
var infiniband map[string]IBF
if m["type"] == "infiniband" {
@@ -5561,11 +5047,6 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
}
- m, err = c.insertNetworkDevice(k, m)
- if err != nil {
- return err
- }
-
// Plugin in all character devices
if m["type"] == "infiniband" {
key := m["parent"]
@@ -5687,11 +5168,6 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
for k, m := range updateDevices {
if m["type"] == "disk" {
updateDiskLimit = true
- } else if m["type"] == "nic" {
- bounceInterfaces, err = c.updateNetworkDevice(k, m, oldExpandedDevices[k])
- if err != nil {
- return err
- }
} else if m["type"] == "proxy" {
err = c.updateProxyDevice(k, m)
if err != nil {
@@ -5828,37 +5304,6 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
}
- // Update network leases if a bridged device has changed.
- needsUpdate := false
- deviceLists := []map[string]config.Device{removeDevices, addDevices, updateDevices}
- for _, deviceList := range deviceLists {
- for _, m := range deviceList {
- if m["type"] == "nic" && m["nictype"] == "bridged" {
- needsUpdate = true
- break
- }
- }
- }
-
- if needsUpdate {
- networkUpdateStatic(c.state, "")
-
- // After dnsmasq config is updated and reloaded, reset any interfaces requested,
- // this will cause the container to have a carrier lost/carrier gained event, and
- // some DHCP clients (such as systemd) will then re-apply for their leases allowing
- // static IP changes to take effect.
- for _, hostName := range bounceInterfaces {
- _, err := shared.RunCommand("ip", "link", "set", hostName, "down")
- if err != nil {
- return err
- }
- _, err = shared.RunCommand("ip", "link", "set", hostName, "up")
- if err != nil {
- return err
- }
- }
- }
-
// Send devlxd notifications
if isRunning {
// Config changes (only for user.* keys
@@ -5937,6 +5382,59 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
return nil
}
+func (c *containerLXC) updateDevices(removeDevices map[string]config.Device, addDevices map[string]config.Device, updateDevices map[string]config.Device, oldExpandedDevices config.Devices) error {
+ isRunning := c.IsRunning()
+
+ for k, m := range removeDevices {
+ if m["type"] != "nic" {
+ continue
+ }
+
+ if isRunning {
+ err := c.deviceStop(k, m, "")
+ if err != nil {
+ return err
+ }
+ }
+
+ err := c.deviceRemove(k, m)
+ if err != nil {
+ return err
+ }
+ }
+
+ for k, m := range addDevices {
+ if m["type"] != "nic" {
+ continue
+ }
+
+ err := c.deviceAdd(k, m)
+ if err != nil {
+ return err
+ }
+
+ if isRunning {
+ _, err := c.deviceStart(k, m, isRunning)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ for k, m := range updateDevices {
+ if m["type"] != "nic" {
+ continue
+ }
+
+ err := c.deviceUpdate(k, m, oldExpandedDevices[k], isRunning)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
func (c *containerLXC) Export(w io.Writer, properties map[string]string) error {
ctxMap := log.Ctx{
"project": c.project,
@@ -8298,248 +7796,6 @@ func (c *containerLXC) restartProxyDevices() error {
return nil
}
-// Network device handling
-func (c *containerLXC) createNetworkDevice(name string, m config.Device) (string, error) {
- var dev, n1 string
-
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p", "macvlan"}) {
- // Host Virtual NIC name
- if m["host_name"] != "" {
- n1 = m["host_name"]
- } else {
- n1 = device.NetworkRandomDevName("veth")
- }
- }
-
- if m["nictype"] == "sriov" {
- dev = m["host_name"]
- }
-
- // Handle bridged and p2p
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- n2 := device.NetworkRandomDevName("veth")
-
- _, err := shared.RunCommand("ip", "link", "add", "dev", n1, "type", "veth", "peer", "name", n2)
- if err != nil {
- return "", fmt.Errorf("Failed to create the veth interface: %s", err)
- }
-
- _, err = shared.RunCommand("ip", "link", "set", "dev", n1, "up")
- if err != nil {
- return "", fmt.Errorf("Failed to bring up the veth interface %s: %s", n1, err)
- }
-
- if m["nictype"] == "bridged" {
- err = device.NetworkAttachInterface(m["parent"], n1)
- if err != nil {
- device.NetworkRemoveInterface(n2)
- return "", fmt.Errorf("Failed to add interface to bridge: %s", err)
- }
-
- // Attempt to disable router advertisement acceptance
- device.NetworkSysctlSet(fmt.Sprintf("ipv6/conf/%s/accept_ra", n1), "0")
- }
-
- // Record the new device's host name for use in setupHostVethDevice()
- hostNameKey := fmt.Sprintf("volatile.%s.host_name", name)
- c.localConfig[hostNameKey] = n1
-
- dev = n2
- }
-
- // Handle physical and macvlan
- if shared.StringInSlice(m["nictype"], []string{"macvlan", "physical"}) {
- // Deal with VLAN on parent
- device, err := c.setupPhysicalParent(name, m)
- if err != nil {
- return "", err
- }
-
- // Handle physical
- if m["nictype"] == "physical" {
- dev = device
- }
-
- // Handle macvlan
- if m["nictype"] == "macvlan" {
- _, err := shared.RunCommand("ip", "link", "add", "dev", n1, "link", device, "type", "macvlan", "mode", "bridge")
- if err != nil {
- return "", fmt.Errorf("Failed to create the new macvlan interface: %s", err)
- }
-
- dev = n1
- }
- }
-
- // Set the MAC address
- if m["hwaddr"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", dev, "address", m["hwaddr"])
- if err != nil {
- device.NetworkRemoveInterface(dev)
- return "", fmt.Errorf("Failed to set the MAC address: %s", err)
- }
- }
-
- // Set the MTU
- if m["mtu"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", dev, "mtu", m["mtu"])
- if err != nil {
- device.NetworkRemoveInterface(dev)
- return "", fmt.Errorf("Failed to set the MTU: %s", err)
- }
- }
-
- // Bring the interface up
- _, err := shared.RunCommand("ip", "link", "set", "dev", dev, "up")
- if err != nil {
- device.NetworkRemoveInterface(dev)
- return "", fmt.Errorf("Failed to bring up the interface: %s", err)
- }
-
- return dev, nil
-}
-
-func (c *containerLXC) fillSriovNetworkDevice(name string, m config.Device, reserved []string) (config.Device, error) {
- if m["nictype"] != "sriov" {
- return m, nil
- }
-
- if m["parent"] == "" {
- return nil, fmt.Errorf("Missing parent for 'sriov' nic '%s'", name)
- }
-
- newDevice := config.Device{}
- err := shared.DeepCopy(&m, &newDevice)
- if err != nil {
- return nil, err
- }
-
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", m["parent"])) {
- return nil, fmt.Errorf("Parent device '%s' doesn't exist", m["parent"])
- }
- sriovNumVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_numvfs", m["parent"])
- sriovTotalVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_totalvfs", m["parent"])
-
- // verify that this is indeed a SR-IOV enabled device
- if !shared.PathExists(sriovTotalVFs) {
- return nil, fmt.Errorf("Parent device '%s' doesn't support SR-IOV", m["parent"])
- }
-
- // get number of currently enabled VFs
- sriovNumVfsBuf, err := ioutil.ReadFile(sriovNumVFs)
- if err != nil {
- return nil, err
- }
- sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
- sriovNum, err := strconv.Atoi(sriovNumVfsStr)
- if err != nil {
- return nil, err
- }
-
- // get number of possible VFs
- sriovTotalVfsBuf, err := ioutil.ReadFile(sriovTotalVFs)
- if err != nil {
- return nil, err
- }
- sriovTotalVfsStr := strings.TrimSpace(string(sriovTotalVfsBuf))
- sriovTotal, err := strconv.Atoi(sriovTotalVfsStr)
- if err != nil {
- return nil, err
- }
-
- // Ensure parent is up (needed for Intel at least)
- _, err = shared.RunCommand("ip", "link", "set", "dev", m["parent"], "up")
- if err != nil {
- return nil, err
- }
-
- // Check if any VFs are already enabled
- nicName := ""
- vfID := 0
- for i := 0; i < sriovNum; i++ {
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i)) {
- continue
- }
-
- // Check if VF is already in use
- empty, err := shared.PathIsEmpty(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i))
- if err != nil {
- return nil, err
- }
- if empty {
- continue
- }
-
- vf := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i)
- ents, err := ioutil.ReadDir(vf)
- if err != nil {
- return nil, err
- }
-
- for _, ent := range ents {
- // another nic device entry called dibs
- if shared.StringInSlice(ent.Name(), reserved) {
- continue
- }
-
- nicName = ent.Name()
- vfID = i
- break
- }
-
- // found a free one
- if nicName != "" {
- break
- }
- }
-
- if nicName == "" && m["type"] != "infiniband" {
- if sriovNum == sriovTotal {
- return nil, fmt.Errorf("All virtual functions of sriov device '%s' seem to be in use", m["parent"])
- }
-
- // bump the number of VFs to the maximum
- err := ioutil.WriteFile(sriovNumVFs, []byte(sriovTotalVfsStr), 0644)
- if err != nil {
- return nil, err
- }
-
- // use next free VF index
- for i := sriovNum + 1; i < sriovTotal; i++ {
- vf := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i)
- ents, err := ioutil.ReadDir(vf)
- if err != nil {
- return nil, err
- }
-
- if len(ents) != 1 {
- return nil, fmt.Errorf("Failed to determine unique device name")
- }
-
- // another nic device entry called dibs
- if shared.StringInSlice(ents[0].Name(), reserved) {
- continue
- }
-
- // found a free one
- nicName = ents[0].Name()
- vfID = i
- break
- }
- }
-
- if nicName == "" {
- return nil, fmt.Errorf("All virtual functions on device \"%s\" are already in use", name)
- }
-
- newDevice["host_name"] = nicName
- newDevice["vfID"] = fmt.Sprintf("%d", vfID)
- hostNameKey := fmt.Sprintf("volatile.%s.host_name", name)
- c.localConfig[hostNameKey] = nicName
-
- return newDevice, nil
-}
-
func (c *containerLXC) fillNetworkDevice(name string, m config.Device) (config.Device, error) {
newDevice := config.Device{}
err := shared.DeepCopy(&m, &newDevice)
@@ -8609,7 +7865,7 @@ func (c *containerLXC) fillNetworkDevice(name string, m config.Device) (config.D
return name, nil
}
- i += 1
+ i++
}
}
@@ -8703,543 +7959,34 @@ func (c *containerLXC) fillNetworkDevice(name string, m config.Device) (config.D
return newDevice, nil
}
-// getVolatileHostName returns the last host_name stored for a nic device.
-// Can be used when the host_name of a nic is not statically defined in config and need to find
-// out what the most recently dynamically generated one is.
-func (c *containerLXC) getVolatileHostName(deviceName string) string {
- hostNameKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
- return c.localConfig[hostNameKey]
-}
-
-// getVolatileHwaddr returns the last hwaddr stored for a nic device.
-// Can be used when the hwaddr of a nic is not statically defined in config and need to find
-// out what the most recently dynamically generated one is.
-func (c *containerLXC) getVolatileHwaddr(deviceName string) string {
- hwaddrKey := fmt.Sprintf("volatile.%s.hwaddr", deviceName)
- return c.localConfig[hwaddrKey]
-}
-
-// generateNetworkFilterEbtablesRules returns a customised set of ebtables filter rules based on the device.
-func (c *containerLXC) generateNetworkFilterEbtablesRules(m config.Device, IPv4 net.IP, IPv6 net.IP) [][]string {
- // MAC source filtering rules. Blocks any packet coming from container with an incorrect Ethernet source MAC.
- // This is required for IP filtering too.
- rules := [][]string{
- {"ebtables", "-t", "filter", "-A", "INPUT", "-s", "!", m["hwaddr"], "-i", m["host_name"], "-j", "DROP"},
- {"ebtables", "-t", "filter", "-A", "FORWARD", "-s", "!", m["hwaddr"], "-i", m["host_name"], "-j", "DROP"},
- }
-
- if shared.IsTrue(m["security.ipv4_filtering"]) && IPv4 != nil {
- rules = append(rules,
- // Prevent ARP MAC spoofing (prevents the container poisoning the ARP cache of its neighbours with a MAC address that isn't its own).
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "ARP", "-i", m["host_name"], "--arp-mac-src", "!", m["hwaddr"], "-j", "DROP"},
- []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "ARP", "-i", m["host_name"], "--arp-mac-src", "!", m["hwaddr"], "-j", "DROP"},
- // Prevent ARP IP spoofing (prevents the container redirecting traffic for IPs that are not its own).
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "ARP", "-i", m["host_name"], "--arp-ip-src", "!", IPv4.String(), "-j", "DROP"},
- []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "ARP", "-i", m["host_name"], "--arp-ip-src", "!", IPv4.String(), "-j", "DROP"},
- // Allow DHCPv4 to the host only. This must come before the IP source filtering rules below.
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv4", "-s", m["hwaddr"], "-i", m["host_name"], "--ip-src", "0.0.0.0", "--ip-dst", "255.255.255.255", "--ip-proto", "udp", "--ip-dport", "67", "-j", "ACCEPT"},
- // IP source filtering rules. Blocks any packet coming from container with an incorrect IP source address.
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv4", "-i", m["host_name"], "--ip-src", "!", IPv4.String(), "-j", "DROP"},
- []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "IPv4", "-i", m["host_name"], "--ip-src", "!", IPv4.String(), "-j", "DROP"},
- )
- }
-
- if shared.IsTrue(m["security.ipv6_filtering"]) && IPv6 != nil {
- rules = append(rules,
- // Allow DHCPv6 and Router Solicitation to the host only. This must come before the IP source filtering rules below.
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-s", m["hwaddr"], "-i", m["host_name"], "--ip6-src", "fe80::/ffc0::", "--ip6-dst", "ff02::1:2/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "--ip6-proto", "udp", "--ip6-dport", "547", "-j", "ACCEPT"},
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-s", m["hwaddr"], "-i", m["host_name"], "--ip6-src", "fe80::/ffc0::", "--ip6-dst", "ff02::2/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "--ip6-proto", "ipv6-icmp", "--ip6-icmp-type", "router-solicitation", "-j", "ACCEPT"},
- // IP source filtering rules. Blocks any packet coming from container with an incorrect IP source address.
- []string{"ebtables", "-t", "filter", "-A", "INPUT", "-p", "IPv6", "-i", m["host_name"], "--ip6-src", "!", fmt.Sprintf("%s/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", IPv6.String()), "-j", "DROP"},
- []string{"ebtables", "-t", "filter", "-A", "FORWARD", "-p", "IPv6", "-i", m["host_name"], "--ip6-src", "!", fmt.Sprintf("%s/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", IPv6.String()), "-j", "DROP"},
- )
- }
-
- return rules
-}
-
-// generateNetworkFilterIptablesRules returns a customised set of iptables filter rules based on the device.
-func (c *containerLXC) generateNetworkFilterIptablesRules(m config.Device, IPv6 net.IP) (rules [][]string, err error) {
- mac, err := net.ParseMAC(m["hwaddr"])
- if err != nil {
- return
- }
-
- macHex := hex.EncodeToString(mac)
-
- // These rules below are implemented using ip6tables because the functionality to inspect
- // the contents of an ICMPv6 packet does not exist in ebtables (unlike for IPv4 ARP).
- // Additionally, ip6tables doesn't really provide a nice way to do what we need here, so we
- // have resorted to doing a raw hex comparison of the packet contents at fixed positions.
- // If these rules are not added then it is possible to hijack traffic for another IP that is
- // not assigned to the container by sending a specially crafted gratuitous NDP packet with
- // correct source address and MAC at the IP & ethernet layers, but a fraudulent IP or MAC
- // inside the ICMPv6 NDP packet.
- if shared.IsTrue(m["security.ipv6_filtering"]) && IPv6 != nil {
- ipv6Hex := hex.EncodeToString(IPv6)
-
- rules = append(rules,
- // Prevent Neighbor Advertisement IP spoofing (prevents the container redirecting traffic for IPs that are not its own).
- []string{"ipv6", "INPUT", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", ipv6Hex), "--algo", "bm", "--from", "48", "--to", "64", "-j", "DROP"},
- []string{"ipv6", "FORWARD", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", ipv6Hex), "--algo", "bm", "--from", "48", "--to", "64", "-j", "DROP"},
- // Prevent Neighbor Advertisement MAC spoofing (prevents the container poisoning the NDP cache of its neighbours with a MAC address that isn't its own).
- []string{"ipv6", "INPUT", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", macHex), "--algo", "bm", "--from", "66", "--to", "72", "-j", "DROP"},
- []string{"ipv6", "FORWARD", "-i", m["parent"], "-p", "ipv6-icmp", "-m", "physdev", "--physdev-in", m["host_name"], "-m", "icmp6", "--icmpv6-type", "136", "-m", "string", "!", "--hex-string", fmt.Sprintf("|%s|", macHex), "--algo", "bm", "--from", "66", "--to", "72", "-j", "DROP"},
- )
- }
-
- return
-}
-
-// setNetworkFilters sets up any network level filters defined for the container.
-// These are controlled by the security.mac_filtering, security.ipv4_Filtering and security.ipv6_filtering config keys.
-func (c *containerLXC) setNetworkFilters(deviceName string, m config.Device) (err error) {
- if m["hwaddr"] == "" {
- return fmt.Errorf("Failed to set network filters: require hwaddr defined")
- }
+// Disk device handling
+func (c *containerLXC) createDiskDevice(name string, m config.Device) (string, error) {
+ // source paths
+ relativeDestPath := strings.TrimPrefix(m["path"], "/")
+ devName := fmt.Sprintf("disk.%s.%s", strings.Replace(name, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
+ devPath := filepath.Join(c.DevicesPath(), devName)
+ srcPath := shared.HostPath(m["source"])
- if m["host_name"] == "" {
- return fmt.Errorf("Failed to set network filters: require host_name defined")
- }
+ // Check if read-only
+ isOptional := shared.IsTrue(m["optional"])
+ isReadOnly := shared.IsTrue(m["readonly"])
+ isRecursive := shared.IsTrue(m["recursive"])
- if m["parent"] == "" {
- return fmt.Errorf("Failed to set network filters: require parent defined")
- }
+ isFile := false
+ if m["pool"] == "" {
+ isFile = !shared.IsDir(srcPath) && !deviceIsBlockdev(srcPath)
+ } else {
+ // Deal with mounting storage volumes created via the storage
+ // api. Extract the name of the storage volume that we are
+ // supposed to attach. We assume that the only syntactically
+ // valid ways of specifying a storage volume are:
+ // - <volume_name>
+ // - <type>/<volume_name>
+ // Currently, <type> must either be empty or "custom". We do not
+ // yet support container mounts.
- if shared.IsTrue(m["security.ipv6_filtering"]) {
- // Check br_netfilter is loaded and enabled for IPv6.
- sysctlVal, err := device.NetworkSysctlGet("bridge/bridge-nf-call-ip6tables")
- if err != nil || sysctlVal != "1\n" {
- return errors.Wrapf(err, "security.ipv6_filtering requires br_netfilter and sysctl net.bridge.bridge-nf-call-ip6tables=1")
- }
- }
-
- // Retrieve existing IPs, or allocate new ones if needed.
- IPv4, IPv6, err := c.allocateNetworkFilterIPs(deviceName, m)
-
- // If anything goes wrong, clean up so we don't leave orphaned rules.
- defer func() {
- if err != nil {
- c.removeNetworkFilters(deviceName, m)
- }
- }()
-
- rules := c.generateNetworkFilterEbtablesRules(m, IPv4, IPv6)
- for _, rule := range rules {
- _, err = shared.RunCommand(rule[0], rule[1:]...)
- if err != nil {
- return err
- }
- }
-
- rules, err = c.generateNetworkFilterIptablesRules(m, IPv6)
- if err != nil {
- return err
- }
-
- for _, rule := range rules {
- err = iptables.ContainerPrepend(rule[0], fmt.Sprintf("%s - %s_filtering", c.Name(), rule[0]), "filter", rule[1], rule[2:]...)
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// allocateNetworkFilterIPs retrieves previously allocated IPs, or allocate new ones if needed.
-func (c *containerLXC) allocateNetworkFilterIPs(deviceName string, m config.Device) (net.IP, net.IP, error) {
- var IPv4, IPv6 net.IP
-
- // Check if there is a valid static IPv4 address defined.
- if m["ipv4.address"] != "" {
- IPv4 = net.ParseIP(m["ipv4.address"])
- if IPv4 == nil {
- return IPv4, IPv6, fmt.Errorf("Invalid static IPv4 address %s", m["ipv4.address"])
- }
- }
-
- // Check if there is a valid static IPv6 address defined.
- if m["ipv6.address"] != "" {
- IPv6 = net.ParseIP(m["ipv6.address"])
- if IPv6 == nil {
- return IPv4, IPv6, fmt.Errorf("Invalid static IPv6 address %s", m["ipv6.address"])
- }
- }
-
- // Read current static IP allocation configured from dnsmasq host config (if exists).
- curIPv4, curIPv6, err := dnsmasq.DHCPStaticIPs(m["parent"], c.Name())
- if err != nil && !os.IsNotExist(err) {
- return IPv4, IPv6, err
- }
-
- n, err := networkLoadByName(c.state, m["parent"])
- if err != nil {
- return IPv4, IPv6, err
- }
- netConfig := n.Config()
-
- // If no static IPv4, then check if there is a valid volatile IPv4 address defined.
- if IPv4 == nil && curIPv4.IP != nil {
- _, subnet, err := net.ParseCIDR(netConfig["ipv4.address"])
- if err != nil {
- return IPv4, IPv6, err
- }
-
- // Check the existing volatile IP is still valid in the subnet & ranges, if not
- // then we'll need to generate a new one.
- ranges := networkDHCPv4Ranges(netConfig)
- if networkDHCPValidIP(subnet, ranges, curIPv4.IP.To4()) {
- IPv4 = curIPv4.IP.To4()
- }
- }
-
- // If no static IPv6, then check if there is a valid volatile IPv6 address defined.
- if IPv6 == nil && curIPv6.IP != nil {
- _, subnet, err := net.ParseCIDR(netConfig["ipv6.address"])
- if err != nil {
- return IPv4, IPv6, err
- }
-
- // Check the existing volatile IP is still valid in the subnet & ranges, if not
- // then we'll need to generate a new one.
- ranges := networkDHCPv6Ranges(netConfig)
- if networkDHCPValidIP(subnet, ranges, curIPv6.IP.To16()) {
- IPv6 = curIPv6.IP.To16()
- }
- }
-
- // If we need to generate either a new IPv4 or IPv6, load existing IPs used in network.
- if IPv4 == nil || IPv6 == nil {
- dnsmasq.ConfigMutex.Lock()
-
- // Get existing allocations in network.
- IPv4Allocs, IPv6Allocs, err := dnsmasq.DHCPAllocatedIPs(m["parent"])
- if err != nil {
- dnsmasq.ConfigMutex.Unlock()
- return IPv4, IPv6, err
- }
-
- // Allocate a new IPv4 address is IPv4 filtering enabled.
- if IPv4 == nil && shared.IsTrue(m["security.ipv4_filtering"]) {
- IPv4, err = networkDHCPFindFreeIPv4(IPv4Allocs, netConfig, c.Name(), m["hwaddr"])
- if err != nil {
- dnsmasq.ConfigMutex.Unlock()
- return IPv4, IPv6, err
- }
- }
-
- // Allocate a new IPv6 address is IPv6 filtering enabled.
- if IPv6 == nil && shared.IsTrue(m["security.ipv6_filtering"]) {
- IPv6, err = networkDHCPFindFreeIPv6(IPv6Allocs, netConfig, c.Name(), m["hwaddr"])
- if err != nil {
- dnsmasq.ConfigMutex.Unlock()
- return IPv4, IPv6, err
- }
- }
-
- dnsmasq.ConfigMutex.Unlock()
- }
-
- // If either IPv4 or IPv6 assigned is different than what is in dnsmasq config, rebuild config.
- if (IPv4 != nil && bytes.Compare(curIPv4.IP, IPv4.To4()) != 0) || (IPv6 != nil && bytes.Compare(curIPv6.IP, IPv6.To16()) != 0) {
- var IPv4Str, IPv6Str string
-
- if IPv4 != nil {
- IPv4Str = IPv4.String()
- }
-
- if IPv6 != nil {
- IPv6Str = IPv6.String()
- }
-
- dnsmasq.ConfigMutex.Lock()
- defer dnsmasq.ConfigMutex.Unlock()
-
- err = dnsmasq.UpdateStaticEntry(m["parent"], c.Project(), c.Name(), netConfig, m["hwaddr"], IPv4Str, IPv6Str)
- if err != nil {
- return IPv4, IPv6, err
- }
-
- err = dnsmasq.Kill(m["parent"], true)
- if err != nil {
- return IPv4, IPv6, err
- }
- }
-
- return IPv4, IPv6, nil
-}
-
-// removeNetworkFilters removes any network level filters defined for the container.
-func (c *containerLXC) removeNetworkFilters(deviceName string, m config.Device) {
- if m["hwaddr"] == "" {
- logger.Error("Failed to remove network filters", log.Ctx{"container": c.Name(), "device": deviceName, "err": "hwaddr not defined"})
- return
- }
-
- if m["host_name"] == "" {
- logger.Error("Failed to remove network filters", log.Ctx{"container": c.Name(), "device": deviceName, "err": "host_name not defined"})
- return
- }
-
- // Remove any IPv6 filters used for this container.
- err := iptables.ContainerClear("ipv6", fmt.Sprintf("%s - ipv6_filtering", c.Name()), "filter")
- if err != nil {
- logger.Error("Failed to clear ip6tables ipv6_filter rules", log.Ctx{"container": c.Name(), "device": deviceName, "err": err})
- }
-
- // Read current static IP allocation configured from dnsmasq host config (if exists).
- IPv4, IPv6, err := dnsmasq.DHCPStaticIPs(m["parent"], c.Name())
- if err != nil {
- logger.Error("Failed to remove network filters", log.Ctx{"container": c.Name(), "device": deviceName, "err": err})
- }
-
- // Get a current list of rules active on the host.
- out, err := shared.RunCommand("ebtables", "-L", "--Lmac2", "--Lx")
- if err != nil {
- logger.Error("Failed to remove network filters", log.Ctx{"container": c.Name(), "device": deviceName, "err": err})
- return
- }
-
- // Get a list of rules that we would have applied on container start.
- rules := c.generateNetworkFilterEbtablesRules(m, IPv4.IP, IPv6.IP)
-
- // Iterate through each active rule on the host and try and match it to one the LXD rules.
- for _, line := range strings.Split(out, "\n") {
- line = strings.TrimSpace(line)
- fields := strings.Fields(line)
- fieldsLen := len(fields)
-
- for _, rule := range rules {
- // Rule doesn't match if the field lenths aren't the same, move on.
- if len(rule) != fieldsLen {
- continue
- }
-
- // Check whether active rule matches one of our rules to delete.
- if !c.matchEbtablesRule(fields, rule, true) {
- continue
- }
-
- // If we get this far, then the current host rule matches one of our LXD
- // rules, so we should run the modified command to delete it.
- _, err = shared.RunCommand(fields[0], fields[1:]...)
- if err != nil {
- logger.Error("Failed to remove network filter rule", log.Ctx{"container": c.Name(), "err": err})
- }
- }
-
- }
-
- return
-}
-
-// matchEbtablesRule compares an active rule to a supplied match rule to see if they match.
-// If deleteMode is true then the "-A" flag in the active rule will be modified to "-D" and will
-// not be part of the equality match. This allows delete commands to be generated from dumped add commands.
-func (c *containerLXC) matchEbtablesRule(activeRule []string, matchRule []string, deleteMode bool) bool {
- for i := range matchRule {
- // Active rules will be dumped in "add" format, we need to detect
- // this and switch it to "delete" mode if requested. If this has already been
- // done then move on, as we don't want to break the comparison below.
- if deleteMode && (activeRule[i] == "-A" || activeRule[i] == "-D") {
- activeRule[i] = "-D"
- continue
- }
-
- // Check the match rule field matches the active rule field.
- // If they don't match, then this isn't one of our rules.
- if activeRule[i] != matchRule[i] {
- return false
- }
- }
-
- return true
-}
-
-func (c *containerLXC) insertNetworkDevice(name string, m config.Device) (config.Device, error) {
- // Load the go-lxc struct
- err := c.initLXC(false)
- if err != nil {
- return m, nil
- }
-
- // Fill in some fields from volatile
- m, err = c.fillNetworkDevice(name, m)
- if err != nil {
- return m, nil
- }
-
- if m["parent"] != "" && !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", m["parent"])) {
- return nil, fmt.Errorf("Parent device '%s' doesn't exist", m["parent"])
- }
-
- // Return empty list if not running
- if !c.IsRunning() {
- return nil, fmt.Errorf("Can't insert device into stopped container")
- }
-
- // Fill in some fields from volatile and setup the virtual function
- if m["nictype"] == "sriov" {
- m, err = c.fillSriovNetworkDevice(name, m, []string{})
- if err != nil {
- return nil, err
- }
-
- err = c.setupSriovParent(name, m)
- if err != nil {
- return nil, err
- }
- }
-
- // Block user trying to add an ipvlan nic on running container as is only supported via LXC
- if m["nictype"] == "ipvlan" {
- return nil, errors.New("Can't insert ipvlan device to running container")
- }
-
- // Create the interface
- devName, err := c.createNetworkDevice(name, m)
- if err != nil {
- return nil, err
- }
-
- // Add the interface to the container
- err = c.c.AttachInterface(devName, m["name"])
- if err != nil {
- return nil, fmt.Errorf("Failed to attach interface: %s: %s", devName, err)
- }
-
- if m["type"] == "nic" && shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- _, err = c.setupHostVethDevice(name, m, config.Device{})
- if err != nil {
- return nil, err
- }
- }
-
- return m, nil
-}
-
-// checkIPVLANSupport checks whether the liblxc available has the necessary IPVLAN features.
-func (c *containerLXC) checkIPVLANSupport() error {
- extensions := c.state.OS.LXCFeatures
- if extensions["network_ipvlan"] && extensions["network_l2proxy"] && extensions["network_gateway_device_route"] {
- return nil
- }
-
- return errors.New("LXC is missing one or more API extensions: network_ipvlan, network_l2proxy, network_gateway_device_route")
-}
-
-func (c *containerLXC) updateNetworkDevice(name string, m config.Device, oldDevice config.Device) ([]string, error) {
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- // Populate network device with container nic names.
- m, err := c.fillNetworkDevice(name, m)
- if err != nil {
- return []string{}, err
- }
-
- bounceInterfaces, err := c.setupHostVethDevice(name, m, oldDevice)
- if err != nil {
- return []string{}, err
- }
-
- return bounceInterfaces, nil
- }
-
- return []string{}, nil
-}
-
-func (c *containerLXC) removeNetworkDevice(name string, m config.Device) error {
- // Fill in some fields from volatile
- m, err := c.fillNetworkDevice(name, m)
- if err != nil {
- return err
- }
-
- // Return empty list if not running
- if !c.IsRunning() {
- return fmt.Errorf("Can't remove device from stopped container")
- }
-
- // Get a temporary device name
- var hostName string
- if m["nictype"] == "physical" {
- hostName = device.NetworkGetHostDevice(m["parent"], m["vlan"])
- } else if m["nictype"] == "sriov" {
- // hostName for sriov devices can change on each boot, so get out of volatile.
- hostName = c.getVolatileHostName(name)
- } else {
- hostName = device.NetworkRandomDevName("veth")
- }
-
- // For some reason, having network config confuses detach, so get our own go-lxc struct
- cname := project.Prefix(c.Project(), c.Name())
- cc, err := lxc.NewContainer(cname, c.state.OS.LxcPath)
- if err != nil {
- return err
- }
- defer cc.Release()
-
- // Check if interface exists inside container namespace
- ifaces, err := cc.Interfaces()
- if err != nil {
- return fmt.Errorf("Failed to list network interfaces: %v", err)
- }
-
- // Remove the interface from the container if it exists
- if shared.StringInSlice(m["name"], ifaces) {
- err = cc.DetachInterfaceRename(m["name"], hostName)
- if err != nil {
- return fmt.Errorf("Failed to detach interface: %s to %s: %v", m["name"], hostName, err)
- }
- }
-
- // Remove host side veth settings and remove veth interface
- if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- c.cleanupHostVethDevice(name, m)
- device.NetworkRemoveInterface(hostName)
- }
-
- // If physical dev, restore MTU using value recorded on parent after removal from container.
- if m["nictype"] == "physical" {
- c.restorePhysicalParent(name, m)
- }
-
- // Restore sriov parent devices
- if m["nictype"] == "sriov" {
- c.restoreSriovParent(name, m)
- }
-
- return nil
-}
-
-// Disk device handling
-func (c *containerLXC) createDiskDevice(name string, m config.Device) (string, error) {
- // source paths
- relativeDestPath := strings.TrimPrefix(m["path"], "/")
- devName := fmt.Sprintf("disk.%s.%s", strings.Replace(name, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
- srcPath := shared.HostPath(m["source"])
-
- // Check if read-only
- isOptional := shared.IsTrue(m["optional"])
- isReadOnly := shared.IsTrue(m["readonly"])
- isRecursive := shared.IsTrue(m["recursive"])
-
- isFile := false
- if m["pool"] == "" {
- isFile = !shared.IsDir(srcPath) && !deviceIsBlockdev(srcPath)
- } else {
- // Deal with mounting storage volumes created via the storage
- // api. Extract the name of the storage volume that we are
- // supposed to attach. We assume that the only syntactically
- // valid ways of specifying a storage volume are:
- // - <volume_name>
- // - <type>/<volume_name>
- // Currently, <type> must either be empty or "custom". We do not
- // yet support container mounts.
-
- if filepath.IsAbs(m["source"]) {
- return "", fmt.Errorf("When the \"pool\" property is set \"source\" must specify the name of a volume, not a path")
+ if filepath.IsAbs(m["source"]) {
+ return "", fmt.Errorf("When the \"pool\" property is set \"source\" must specify the name of a volume, not a path")
}
volumeTypeName := ""
@@ -9695,158 +8442,6 @@ func (c *containerLXC) setNetworkPriority() error {
return nil
}
-// setNetworkRoutes applies any static routes configured from the host to the container nic.
-func (c *containerLXC) setNetworkRoutes(m config.Device) error {
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", m["host_name"])) {
- return fmt.Errorf("Unknown or missing host side veth: %s", m["host_name"])
- }
-
- // Decide whether the route should point to the veth parent or the bridge parent
- routeDev := m["host_name"]
- if m["nictype"] == "bridged" {
- routeDev = m["parent"]
- }
-
- // Add additional IPv4 routes (using boot proto to avoid conflicts with network static routes)
- if m["ipv4.routes"] != "" {
- for _, route := range strings.Split(m["ipv4.routes"], ",") {
- route = strings.TrimSpace(route)
- _, err := shared.RunCommand("ip", "-4", "route", "add", route, "dev", routeDev, "proto", "boot")
- if err != nil {
- return err
- }
- }
- }
-
- // Add additional IPv6 routes (using boot proto to avoid conflicts with network static routes)
- if m["ipv6.routes"] != "" {
- for _, route := range strings.Split(m["ipv6.routes"], ",") {
- route = strings.TrimSpace(route)
- _, err := shared.RunCommand("ip", "-6", "route", "add", route, "dev", routeDev, "proto", "boot")
- if err != nil {
- return err
- }
- }
- }
-
- return nil
-}
-
-// removeNetworkRoutes removes any routes created for this device on the host that were first added
-// with setNetworkRoutes(). Expects to be passed the device config from the oldExpandedDevices.
-func (c *containerLXC) removeNetworkRoutes(deviceName string, m config.Device) {
- // Decide whether the route should point to the veth parent or the bridge parent
- routeDev := m["host_name"]
- if m["nictype"] == "bridged" {
- routeDev = m["parent"]
- }
-
- if m["ipv4.routes"] != "" || m["ipv6.routes"] != "" {
- if routeDev == "" {
- logger.Errorf("Failed to remove static routes as route dev isn't set")
- return
- }
-
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", routeDev)) {
- return //Routes will already be gone if device doesn't exist.
- }
- }
-
- // Remove IPv4 routes
- if m["ipv4.routes"] != "" {
- for _, route := range strings.Split(m["ipv4.routes"], ",") {
- route = strings.TrimSpace(route)
- _, err := shared.RunCommand("ip", "-4", "route", "flush", route, "dev", routeDev, "proto", "boot")
- if err != nil {
- logger.Errorf("Failed to remove static route: %s to %s: %s", route, routeDev, err)
- }
- }
- }
-
- // Remove IPv6 routes
- if m["ipv6.routes"] != "" {
- for _, route := range strings.Split(m["ipv6.routes"], ",") {
- route = strings.TrimSpace(route)
- _, err := shared.RunCommand("ip", "-6", "route", "flush", route, "dev", routeDev, "proto", "boot")
- if err != nil {
- logger.Errorf("Failed to remove static route: %s to %s: %s", route, routeDev, err)
- }
- }
- }
-}
-
-func (c *containerLXC) setNetworkLimits(m config.Device) error {
- var err error
- // We can only do limits on some network type
- if m["nictype"] != "bridged" && m["nictype"] != "p2p" {
- return fmt.Errorf("Network limits are only supported on bridged and p2p interfaces")
- }
-
- veth := m["host_name"]
- if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", veth)) {
- return fmt.Errorf("Unknown or missing host side veth: %s", veth)
- }
-
- // Apply max limit
- if m["limits.max"] != "" {
- m["limits.ingress"] = m["limits.max"]
- m["limits.egress"] = m["limits.max"]
- }
-
- // Parse the values
- var ingressInt int64
- if m["limits.ingress"] != "" {
- ingressInt, err = units.ParseBitSizeString(m["limits.ingress"])
- if err != nil {
- return err
- }
- }
-
- var egressInt int64
- if m["limits.egress"] != "" {
- egressInt, err = units.ParseBitSizeString(m["limits.egress"])
- if err != nil {
- return err
- }
- }
-
- // Clean any existing entry
- shared.RunCommand("tc", "qdisc", "del", "dev", veth, "root")
- shared.RunCommand("tc", "qdisc", "del", "dev", veth, "ingress")
-
- // Apply new limits
- if m["limits.ingress"] != "" {
- out, err := shared.RunCommand("tc", "qdisc", "add", "dev", veth, "root", "handle", "1:0", "htb", "default", "10")
- if err != nil {
- return fmt.Errorf("Failed to create root tc qdisc: %s", out)
- }
-
- out, err = shared.RunCommand("tc", "class", "add", "dev", veth, "parent", "1:0", "classid", "1:10", "htb", "rate", fmt.Sprintf("%dbit", ingressInt))
- if err != nil {
- return fmt.Errorf("Failed to create limit tc class: %s", out)
- }
-
- out, err = shared.RunCommand("tc", "filter", "add", "dev", veth, "parent", "1:0", "protocol", "all", "u32", "match", "u32", "0", "0", "flowid", "1:1")
- if err != nil {
- return fmt.Errorf("Failed to create tc filter: %s", out)
- }
- }
-
- if m["limits.egress"] != "" {
- out, err := shared.RunCommand("tc", "qdisc", "add", "dev", veth, "handle", "ffff:0", "ingress")
- if err != nil {
- return fmt.Errorf("Failed to create ingress tc qdisc: %s", out)
- }
-
- out, err = shared.RunCommand("tc", "filter", "add", "dev", veth, "parent", "ffff:0", "protocol", "all", "u32", "match", "u32", "0", "0", "police", "rate", fmt.Sprintf("%dbit", egressInt), "burst", "1024k", "mtu", "64kb", "drop")
- if err != nil {
- return fmt.Errorf("Failed to create ingress tc qdisc: %s", out)
- }
- }
-
- return nil
-}
-
// Various state query functions
func (c *containerLXC) IsStateful() bool {
return c.stateful
From 2a0d02775897d039f648b7e5174dc69814e22b59 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 09:00:46 +0100
Subject: [PATCH 23/32] container: NIC device validation to device interface
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container.go | 88 +++---------------------------------------------
1 file changed, 4 insertions(+), 84 deletions(-)
diff --git a/lxd/container.go b/lxd/container.go
index 8e61be6a36..c886373164 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -386,90 +386,10 @@ func containerValidDevices(cluster *db.Cluster, devices config.Devices, profile
}
if m["type"] == "nic" {
- if m["nictype"] == "" {
- return fmt.Errorf("Missing nic type")
- }
-
- if !shared.StringInSlice(m["nictype"], []string{"bridged", "macvlan", "ipvlan", "p2p", "physical", "sriov"}) {
- return fmt.Errorf("Bad nic type: %s", m["nictype"])
- }
-
- if shared.StringInSlice(m["nictype"], []string{"bridged", "macvlan", "ipvlan", "physical", "sriov"}) && m["parent"] == "" {
- return fmt.Errorf("Missing parent for %s type nic", m["nictype"])
- }
-
- if m["ipv4.address"] != "" {
- if m["nictype"] == "ipvlan" {
- err := networkValidAddressV4List(m["ipv4.address"])
- if err != nil {
- return err
- }
- } else {
- err := networkValidAddressV4(m["ipv4.address"])
- if err != nil {
- return err
- }
- }
- }
-
- if m["ipv6.address"] != "" {
- if m["nictype"] == "ipvlan" {
- err := networkValidAddressV6List(m["ipv6.address"])
- if err != nil {
- return err
- }
- } else {
- err := networkValidAddressV6(m["ipv6.address"])
- if err != nil {
- return err
- }
- }
- }
-
- if m["ipv4.routes"] != "" {
- if !shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- return fmt.Errorf("Bad nic type for ipv4.routes: %s", m["nictype"])
- }
-
- for _, route := range strings.Split(m["ipv4.routes"], ",") {
- route = strings.TrimSpace(route)
- err := networkValidNetworkV4(route)
- if err != nil {
- return err
- }
- }
- }
-
- if m["ipv6.routes"] != "" {
- if !shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
- return fmt.Errorf("Bad nic type for ipv6.routes: %s", m["nictype"])
- }
-
- for _, route := range strings.Split(m["ipv6.routes"], ",") {
- route = strings.TrimSpace(route)
- err := networkValidNetworkV6(route)
- if err != nil {
- return err
- }
- }
- }
-
- if shared.IsTrue(m["security.mac_filtering"]) {
- if !shared.StringInSlice(m["nictype"], []string{"bridged", "sriov"}) {
- return fmt.Errorf("Bad nic type for security.mac_filtering: %s", m["nictype"])
- }
- }
-
- if shared.IsTrue(m["security.ipv4_filtering"]) {
- if m["nictype"] != "bridged" {
- return fmt.Errorf("Bad nic type for security.ipv4_filtering: %s", m["nictype"])
- }
- }
-
- if shared.IsTrue(m["security.ipv6_filtering"]) {
- if m["nictype"] != "bridged" {
- return fmt.Errorf("Bad nic type for security.ipv6_filtering: %s", m["nictype"])
- }
+ // Validate config using device interface.
+ _, err := device.New(&containerLXC{}, nil, config.Device(m), nil, nil)
+ if err != nil {
+ return nil
}
} else if m["type"] == "infiniband" {
if m["nictype"] == "" {
From 5d0d62330a707a4e1096074f5e9a23cfb7f5158b Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 11:28:45 +0100
Subject: [PATCH 24/32] container: Adds state to containerValidDevices and
updates references
Required for device validation.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container.go | 6 +++---
lxd/container_lxc.go | 6 +++---
lxd/profiles.go | 2 +-
lxd/profiles_utils.go | 2 +-
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/lxd/container.go b/lxd/container.go
index c886373164..cca0d7d964 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -362,7 +362,7 @@ func containerValidConfig(sysOS *sys.OS, config map[string]string, profile bool,
return nil
}
-func containerValidDevices(cluster *db.Cluster, devices config.Devices, profile bool, expanded bool) error {
+func containerValidDevices(state *state.State, cluster *db.Cluster, devices config.Devices, profile bool, expanded bool) error {
// Empty device list
if devices == nil {
return nil
@@ -387,7 +387,7 @@ func containerValidDevices(cluster *db.Cluster, devices config.Devices, profile
if m["type"] == "nic" {
// Validate config using device interface.
- _, err := device.New(&containerLXC{}, nil, config.Device(m), nil, nil)
+ _, err := device.New(&containerLXC{}, state, config.Device(m), nil, nil)
if err != nil {
return nil
}
@@ -1173,7 +1173,7 @@ func containerCreateInternal(s *state.State, args db.ContainerArgs) (container,
}
// Validate container devices
- err = containerValidDevices(s.Cluster, args.Devices, false, false)
+ err = containerValidDevices(s, s.Cluster, args.Devices, false, false)
if err != nil {
return nil, errors.Wrap(err, "Invalid devices")
}
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 715701f89f..a0bbabe926 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -376,7 +376,7 @@ func containerLXCCreate(s *state.State, args db.ContainerArgs) (container, error
return nil, err
}
- err = containerValidDevices(s.Cluster, c.expandedDevices, false, true)
+ err = containerValidDevices(s, s.Cluster, c.expandedDevices, false, true)
if err != nil {
c.Delete()
logger.Error("Failed creating container", ctxMap)
@@ -4290,7 +4290,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
// Validate the new devices
- err = containerValidDevices(c.state.Cluster, args.Devices, false, false)
+ err = containerValidDevices(c.state, c.state.Cluster, args.Devices, false, false)
if err != nil {
return errors.Wrap(err, "Invalid devices")
}
@@ -4486,7 +4486,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
}
// Do some validation of the devices diff
- err = containerValidDevices(c.state.Cluster, c.expandedDevices, false, true)
+ err = containerValidDevices(c.state, c.state.Cluster, c.expandedDevices, false, true)
if err != nil {
return errors.Wrap(err, "Invalid expanded devices")
}
diff --git a/lxd/profiles.go b/lxd/profiles.go
index d8351ee3e3..915e382bed 100644
--- a/lxd/profiles.go
+++ b/lxd/profiles.go
@@ -105,7 +105,7 @@ func profilesPost(d *Daemon, r *http.Request) Response {
return BadRequest(err)
}
- err = containerValidDevices(d.cluster, req.Devices, true, false)
+ err = containerValidDevices(d.State(), d.cluster, req.Devices, true, false)
if err != nil {
return BadRequest(err)
}
diff --git a/lxd/profiles_utils.go b/lxd/profiles_utils.go
index 3a156802b2..77328690a2 100644
--- a/lxd/profiles_utils.go
+++ b/lxd/profiles_utils.go
@@ -18,7 +18,7 @@ func doProfileUpdate(d *Daemon, project, name string, id int64, profile *api.Pro
return err
}
- err = containerValidDevices(d.cluster, req.Devices, true, false)
+ err = containerValidDevices(d.State(), d.cluster, req.Devices, true, false)
if err != nil {
return err
}
From adfc57d0360479b07e9ce15c34bfe3f69019dfb9 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 10:54:46 +0100
Subject: [PATCH 25/32] device/utils: Moves IP validation functions from
network_utils
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 97 ++++++++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index 631a98d4f9..be359a006f 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -6,6 +6,7 @@ import (
"encoding/hex"
"fmt"
"io/ioutil"
+ "net"
"os"
"strconv"
"strings"
@@ -498,3 +499,99 @@ func networkSetVethLimits(m config.Device) error {
return nil
}
+
+// NetworkValidAddressV4 validates an IPv4 addresss string.
+func NetworkValidAddressV4(value string) error {
+ if value == "" {
+ return nil
+ }
+
+ ip := net.ParseIP(value)
+ if ip == nil || ip.To4() == nil {
+ return fmt.Errorf("Not an IPv4 address: %s", value)
+ }
+
+ return nil
+}
+
+// NetworkValidAddressV6 validates an IPv6 addresss string.
+func NetworkValidAddressV6(value string) error {
+ if value == "" {
+ return nil
+ }
+
+ ip := net.ParseIP(value)
+ if ip == nil || ip.To4() != nil {
+ return fmt.Errorf("Not an IPv6 address: %s", value)
+ }
+
+ return nil
+}
+
+// NetworkValidAddressV4List validates a comma delimited list of IPv4 addresses.
+func NetworkValidAddressV4List(value string) error {
+ for _, v := range strings.Split(value, ",") {
+ v = strings.TrimSpace(v)
+ err := NetworkValidAddressV4(v)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+//NetworkValidAddressV6List validates a comma delimited list of IPv6 addresses.
+func NetworkValidAddressV6List(value string) error {
+ for _, v := range strings.Split(value, ",") {
+ v = strings.TrimSpace(v)
+ err := NetworkValidAddressV6(v)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// NetworkValidNetworkV4 validates an IPv4 CIDR string.
+func NetworkValidNetworkV4(value string) error {
+ if value == "" {
+ return nil
+ }
+
+ ip, subnet, err := net.ParseCIDR(value)
+ if err != nil {
+ return err
+ }
+
+ if ip.To4() == nil {
+ return fmt.Errorf("Not an IPv4 network: %s", value)
+ }
+
+ if ip.String() != subnet.IP.String() {
+ return fmt.Errorf("Not an IPv4 network address: %s", value)
+ }
+
+ return nil
+}
+
+// NetworkValidNetworkV6 validates an IPv6 CIDR string.
+func NetworkValidNetworkV6(value string) error {
+ if value == "" {
+ return nil
+ }
+
+ ip, subnet, err := net.ParseCIDR(value)
+ if err != nil {
+ return err
+ }
+
+ if ip == nil || ip.To4() != nil {
+ return fmt.Errorf("Not an IPv6 network: %s", value)
+ }
+
+ if ip.String() != subnet.IP.String() {
+ return fmt.Errorf("Not an IPv6 network address: %s", value)
+ }
+
+ return nil
+}
From ab3d2b11eebb19ce2a45a6857a945cd811cbdc40 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 10:55:11 +0100
Subject: [PATCH 26/32] network/config: Updates to use IP validation in
device_utils
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/networks_config.go | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/lxd/networks_config.go b/lxd/networks_config.go
index ae9194eaf0..bdf416498c 100644
--- a/lxd/networks_config.go
+++ b/lxd/networks_config.go
@@ -5,6 +5,7 @@ import (
"strconv"
"strings"
+ "github.com/lxc/lxd/lxd/device"
"github.com/lxc/lxd/shared"
)
@@ -32,13 +33,13 @@ var networkConfigKeys = map[string]func(value string) error{
return shared.IsOneOf(value, []string{"standard", "fan"})
},
- "fan.overlay_subnet": networkValidNetworkV4,
+ "fan.overlay_subnet": device.NetworkValidNetworkV4,
"fan.underlay_subnet": func(value string) error {
if value == "auto" {
return nil
}
- return networkValidNetworkV4(value)
+ return device.NetworkValidNetworkV4(value)
},
"fan.type": func(value string) error {
return shared.IsOneOf(value, []string{"vxlan", "ipip"})
@@ -67,9 +68,9 @@ var networkConfigKeys = map[string]func(value string) error{
"ipv4.nat.order": func(value string) error {
return shared.IsOneOf(value, []string{"before", "after"})
},
- "ipv4.nat.address": networkValidAddressV4,
+ "ipv4.nat.address": device.NetworkValidAddressV4,
"ipv4.dhcp": shared.IsBool,
- "ipv4.dhcp.gateway": networkValidAddressV4,
+ "ipv4.dhcp.gateway": device.NetworkValidAddressV4,
"ipv4.dhcp.expiry": shared.IsAny,
"ipv4.dhcp.ranges": shared.IsAny,
"ipv4.routes": shared.IsAny,
@@ -87,7 +88,7 @@ var networkConfigKeys = map[string]func(value string) error{
"ipv6.nat.order": func(value string) error {
return shared.IsOneOf(value, []string{"before", "after"})
},
- "ipv6.nat.address": networkValidAddressV6,
+ "ipv6.nat.address": device.NetworkValidAddressV6,
"ipv6.dhcp": shared.IsBool,
"ipv6.dhcp.expiry": shared.IsAny,
"ipv6.dhcp.stateful": shared.IsBool,
From 7e2f00235d5bd68d67c94327f9e5cfbc6197c2da Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 10:55:33 +0100
Subject: [PATCH 27/32] networks/utils: Removes unused IP validation functions
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/networks_utils.go | 90 -------------------------------------------
1 file changed, 90 deletions(-)
diff --git a/lxd/networks_utils.go b/lxd/networks_utils.go
index dec09fb01c..085ac97464 100644
--- a/lxd/networks_utils.go
+++ b/lxd/networks_utils.go
@@ -509,96 +509,6 @@ func networkValidAddress(value string) error {
return nil
}
-func networkValidAddressV4(value string) error {
- if value == "" {
- return nil
- }
-
- ip := net.ParseIP(value)
- if ip == nil || ip.To4() == nil {
- return fmt.Errorf("Not an IPv4 address: %s", value)
- }
-
- return nil
-}
-
-func networkValidAddressV6(value string) error {
- if value == "" {
- return nil
- }
-
- ip := net.ParseIP(value)
- if ip == nil || ip.To4() != nil {
- return fmt.Errorf("Not an IPv6 address: %s", value)
- }
-
- return nil
-}
-
-func networkValidAddressV4List(value string) error {
- for _, v := range strings.Split(value, ",") {
- v = strings.TrimSpace(v)
- err := networkValidAddressV4(v)
- if err != nil {
- return err
- }
- }
- return nil
-}
-
-func networkValidAddressV6List(value string) error {
- for _, v := range strings.Split(value, ",") {
- v = strings.TrimSpace(v)
- err := networkValidAddressV6(v)
- if err != nil {
- return err
- }
- }
- return nil
-}
-
-func networkValidNetworkV4(value string) error {
- if value == "" {
- return nil
- }
-
- ip, subnet, err := net.ParseCIDR(value)
- if err != nil {
- return err
- }
-
- if ip.To4() == nil {
- return fmt.Errorf("Not an IPv4 network: %s", value)
- }
-
- if ip.String() != subnet.IP.String() {
- return fmt.Errorf("Not an IPv4 network address: %s", value)
- }
-
- return nil
-}
-
-func networkValidNetworkV6(value string) error {
- if value == "" {
- return nil
- }
-
- ip, subnet, err := net.ParseCIDR(value)
- if err != nil {
- return err
- }
-
- if ip == nil || ip.To4() != nil {
- return fmt.Errorf("Not an IPv6 network: %s", value)
- }
-
- if ip.String() != subnet.IP.String() {
- return fmt.Errorf("Not an IPv6 network address: %s", value)
- }
-
- return nil
-}
-
func networkAddressForSubnet(subnet *net.IPNet) (net.IP, string, error) {
ifaces, err := net.Interfaces()
if err != nil {
From fe53021c388bb20281627983845bef8229f78866 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 11:02:46 +0100
Subject: [PATCH 28/32] doc: Updates container volatile keys for host_name
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
doc/containers.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/containers.md b/doc/containers.md
index c3741f416b..2d7deb5d1e 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -98,7 +98,7 @@ volatile.idmap.current | string | - | The id
volatile.idmap.next | string | - | The idmap to use next time the container starts
volatile.last\_state.idmap | string | - | Serialized container uid/gid map
volatile.last\_state.power | string | - | Container state as of last host shutdown
-volatile.\<name\>.host\_name | string | - | Network device name on the host (for nictype=bridged or nictype=p2p, or nictype=sriov)
+volatile.\<name\>.host\_name | string | - | Network device name on the host (all nictypes)
volatile.\<name\>.hwaddr | string | - | Network device MAC address (when no hwaddr property is set on the device itself)
volatile.\<name\>.last\_state.created | string | - | Whether or not the network device physical device was created ("true" or "false")
volatile.\<name\>.last\_state.mtu | string | - | Network device original MTU used when moving a physical device into a container
From fc259feb132c816dcea46461f1af5fced752f868 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 11:49:20 +0100
Subject: [PATCH 29/32] doc: Removes host_name setting from ipvlan and macvlan
as this is incorrect
It doesn't make sense to say you can specify a host_name property for macvlan and ipvlan, as there is no persistent interface on the host.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
doc/containers.md | 2 --
1 file changed, 2 deletions(-)
diff --git a/doc/containers.md b/doc/containers.md
index 2d7deb5d1e..38825ea694 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -300,7 +300,6 @@ parent | string | - | yes | -
name | string | kernel assigned | no | - | The name of the interface inside the container
mtu | integer | parent MTU | no | - | The MTU of the new interface
hwaddr | string | randomly assigned | no | - | The MAC address of the new interface
-host\_name | string | randomly assigned | no | - | The name of the interface inside the host
vlan | integer | - | no | network\_vlan | The VLAN ID to attach to
maas.subnet.ipv4 | string | - | no | maas\_network | MAAS IPv4 subnet to register the container in
maas.subnet.ipv6 | string | - | no | maas\_network | MAAS IPv6 subnet to register the container in
@@ -338,7 +337,6 @@ parent | string | - | yes | -
name | string | kernel assigned | no | - | The name of the interface inside the container
mtu | integer | parent MTU | no | - | The MTU of the new interface
hwaddr | string | randomly assigned | no | - | The MAC address of the new interface
-host\_name | string | randomly assigned | no | - | The name of the interface inside the host
ipv4.address | string | - | no | network | Comma delimited list of IPv4 static addresses to add to container
ipv6.address | string | - | no | network | Comma delimited list of IPv6 static addresses to add to container
vlan | integer | - | no | network\_vlan | The VLAN ID to attach to
From a71f26b2d15035e5a16cfcde66e9788befdbf1c5 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Wed, 24 Jul 2019 13:14:27 +0100
Subject: [PATCH 30/32] device/utils: Adds NetworkSRIOVGetFreeVFInterface
function
Used by nic sriov and infiniband sriov to find free virtual function interface on the same device and port as parent.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 39 ++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index be359a006f..5e42c80c4f 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -2,6 +2,7 @@ package device
import (
"bufio"
+ "bytes"
"crypto/rand"
"encoding/hex"
"fmt"
@@ -595,3 +596,41 @@ func NetworkValidNetworkV6(value string) error {
return nil
}
+
+// NetworkSRIOVGetFreeVFInterface checks the contents of the VF directory to find a free VF
+// interface name that belongs to the same device and port as the parent.
+// Returns VF interface name or empty string if no free interface found.
+func NetworkSRIOVGetFreeVFInterface(reservedDevices map[string]struct{}, vfListPath string, pfDevID []byte, pfDevPort []byte) (string, error) {
+ ents, err := ioutil.ReadDir(vfListPath)
+ if err != nil {
+ return "", err
+ }
+
+ for _, ent := range ents {
+ // We can't use this VF interface as it is reserved by another device.
+ if _, exists := reservedDevices[ent.Name()]; exists {
+ continue
+ }
+
+ // Get VF dev_port and dev_id values.
+ vfDevPort, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_port", vfListPath, ent.Name()))
+ if err != nil {
+ return "", err
+ }
+
+ vfDevID, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_id", vfListPath, ent.Name()))
+ if err != nil {
+ return "", err
+ }
+
+ // Skip VFs if they do not relate to the same device and port as the parent PF.
+ // Some card vendors change the device ID for each port.
+ if bytes.Compare(pfDevPort, vfDevPort) != 0 || bytes.Compare(pfDevID, vfDevID) != 0 {
+ continue
+ }
+
+ return ent.Name(), nil
+ }
+
+ return "", nil
+}
From 45cc55006bd9116c3db188f9029eb1fc841ae30a Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 23 Jul 2019 17:11:43 +0100
Subject: [PATCH 31/32] container/lxc: Fixes infiniband support
also separates the infiniband related setup code into a separate file ready for adding to the device package in the future.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container_lxc.go | 360 ++-------------------
lxd/container_lxc_infiniband.go | 550 ++++++++++++++++++++++++++++++++
2 files changed, 570 insertions(+), 340 deletions(-)
create mode 100644 lxd/container_lxc_infiniband.go
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index a0bbabe926..5bb2d95e2d 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2349,7 +2349,6 @@ func (c *containerLXC) startCommon() (string, error) {
c.removeProxyDevices()
var usbs []usbDevice
- var sriov []string
diskDevices := map[string]config.Device{}
// Create the devices
@@ -2477,21 +2476,12 @@ func (c *containerLXC) startCommon() (string, error) {
diskDevices[k] = m
}
} else if m["type"] == "nic" || m["type"] == "infiniband" {
- var err error
- var infiniband map[string]IBF
- if m["type"] == "infiniband" {
- infiniband, err = deviceLoadInfiniband()
- if err != nil {
- return "", err
- }
- }
-
networkKeyPrefix := "lxc.net"
if !util.RuntimeLiblxcVersionAtLeast(2, 1, 0) {
networkKeyPrefix = "lxc.network"
}
- m, err = c.fillNetworkDevice(k, m)
+ m, err := c.fillNetworkDevice(k, m)
if err != nil {
return "", err
}
@@ -2514,76 +2504,9 @@ func (c *containerLXC) startCommon() (string, error) {
continue
}
- networkidx := -1
- reserved := []string{}
- // Record nictype == physical devices since those won't
- // be available for nictype == sriov.
- for _, dName := range c.expandedDevices.DeviceNames() {
- m := c.expandedDevices[dName]
- if m["type"] != "nic" && m["type"] != "infiniband" {
- continue
- }
-
- if m["nictype"] != "physical" {
- continue
- }
-
- reserved = append(reserved, m["parent"])
- }
-
- for _, dName := range c.expandedDevices.DeviceNames() {
- m := c.expandedDevices[dName]
- if m["type"] != "nic" && m["type"] != "infiniband" {
- continue
- }
- networkidx++
-
- if shared.StringInSlice(dName, sriov) {
- continue
- } else {
- sriov = append(sriov, dName)
- }
-
- if m["nictype"] != "sriov" {
- continue
- }
-
- // Make sure that no one called dibs.
- reserved = append(reserved, m["host_name"])
-
- val := c.c.ConfigItem(fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx))
- if len(val) == 0 || val[0] != "phys" {
- return "", fmt.Errorf("Network index corresponds to false network")
- }
-
- // Fill in correct name right now
- err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), m["host_name"])
- if err != nil {
- return "", err
- }
-
- if m["type"] == "infiniband" {
- key := m["host_name"]
- ifDev, ok := infiniband[key]
- if !ok {
- return "", fmt.Errorf("Specified infiniband device \"%s\" not found", key)
- }
-
- err := c.addInfinibandDevices(dName, &ifDev, false)
- if err != nil {
- return "", err
- }
- }
- }
-
- if m["type"] == "infiniband" && m["nictype"] == "physical" {
- key := m["parent"]
- ifDev, ok := infiniband[key]
- if !ok {
- return "", fmt.Errorf("Specified infiniband device \"%s\" not found", key)
- }
-
- err := c.addInfinibandDevices(k, &ifDev, false)
+ // Start infiniband device.
+ if m["type"] == "infiniband" {
+ err := c.startInfiniband(nicID, k, m)
if err != nil {
return "", err
}
@@ -3281,14 +3204,20 @@ func (c *containerLXC) OnStop(target string) error {
func (c *containerLXC) cleanupNetworkDevices(netns string) {
for _, k := range c.expandedDevices.DeviceNames() {
m := c.expandedDevices[k]
- if m["type"] != "nic" {
- continue
+ // Use the device interface if device supports it.
+ if m["type"] == "nic" {
+ err := c.deviceStop(k, m, netns)
+ if err != nil {
+ logger.Errorf("Failed to stop device: %v", err)
+ }
}
- // Use the device interface if device supports it.
- err := c.deviceStop(k, m, netns)
- if err != nil {
- logger.Errorf("Failed to stop device: %v", err)
+ // Clean up volatile host_name.
+ if m["type"] == "infiniband" {
+ err := c.clearInfinibandVolatile(k)
+ if err != nil {
+ logger.Errorf("Failed to stop infiniband device: %v", err)
+ }
}
}
}
@@ -4928,7 +4857,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
return err
}
} else if m["type"] == "infiniband" {
- err = c.removeInfinibandDevices(k, m)
+ err = c.removeInfinibandDevice(k, m)
if err != nil {
return err
}
@@ -5038,31 +4967,9 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
} else if m["type"] == "disk" && m["path"] != "/" {
diskDevices[k] = m
} else if m["type"] == "infiniband" {
- var err error
- var infiniband map[string]IBF
- if m["type"] == "infiniband" {
- infiniband, err = deviceLoadInfiniband()
- if err != nil {
- return err
- }
- }
-
- // Plugin in all character devices
- if m["type"] == "infiniband" {
- key := m["parent"]
- if m["nictype"] == "sriov" {
- key = m["host_name"]
- }
-
- ifDev, ok := infiniband[key]
- if !ok {
- return fmt.Errorf("Specified infiniband device \"%s\" not found", key)
- }
-
- err := c.addInfinibandDevices(k, &ifDev, true)
- if err != nil {
- return err
- }
+ err := c.addInfinibandDevice(k, m)
+ if err != nil {
+ return err
}
} else if m["type"] == "usb" {
if usbs == nil {
@@ -7294,233 +7201,6 @@ func (c *containerLXC) removeUnixDeviceNum(prefix string, m config.Device, major
return nil
}
-func (c *containerLXC) addInfinibandDevicesPerPort(deviceName string, ifDev *IBF, devices []os.FileInfo, inject bool) error {
- for _, unixCharDev := range ifDev.PerPortDevices {
- destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
- relDestPath := destPath[1:]
- devPrefix := fmt.Sprintf("infiniband.unix.%s", deviceName)
-
- // Unix device
- dummyDevice := config.Device{
- "source": destPath,
- }
-
- deviceExists := false
- // only handle infiniband.unix.<device-name>.
- prefix := fmt.Sprintf("infiniband.unix.")
- for _, ent := range devices {
-
- // skip non infiniband.unix.<device-name> devices
- devName := ent.Name()
- if !strings.HasPrefix(devName, prefix) {
- continue
- }
-
- // extract the path inside the container
- idx := strings.LastIndex(devName, ".")
- if idx == -1 {
- return fmt.Errorf("Invalid infiniband device name \"%s\"", devName)
- }
- rPath := devName[idx+1:]
- rPath = strings.Replace(rPath, "-", "/", -1)
- if rPath != relDestPath {
- continue
- }
-
- deviceExists = true
- break
- }
-
- if inject && !deviceExists {
- err := c.insertUnixDevice(devPrefix, dummyDevice, false)
- if err != nil {
- return err
- }
- continue
- }
-
- paths, err := c.createUnixDevice(devPrefix, dummyDevice, false)
- if err != nil {
- return err
- }
- devPath := paths[0]
-
- if deviceExists {
- continue
- }
-
- // inform liblxc about the mount
- err = lxcSetConfigItem(c.c, "lxc.mount.entry",
- fmt.Sprintf("%s %s none bind,create=file 0 0",
- shared.EscapePathFstab(devPath),
- shared.EscapePathFstab(relDestPath)))
- if err != nil {
- return err
- }
-
- if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
- // Add the new device cgroup rule
- dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
- if err != nil {
- return err
- }
-
- err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
- if err != nil {
- return fmt.Errorf("Failed to add cgroup rule for device")
- }
- }
- }
-
- return nil
-}
-
-func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF, inject bool) error {
- for _, unixCharDev := range ifDev.PerFunDevices {
- destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
- uniqueDevPrefix := fmt.Sprintf("infiniband.unix.%s", deviceName)
- relativeDestPath := fmt.Sprintf("dev/infiniband/%s", unixCharDev)
- uniqueDevName := fmt.Sprintf("%s.%s", uniqueDevPrefix, strings.Replace(relativeDestPath, "/", "-", -1))
- hostDevPath := filepath.Join(c.DevicesPath(), uniqueDevName)
-
- dummyDevice := config.Device{
- "source": destPath,
- }
-
- if inject {
- err := c.insertUnixDevice(uniqueDevPrefix, dummyDevice, false)
- if err != nil {
- return err
- }
- continue
- }
-
- // inform liblxc about the mount
- err := lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file 0 0", hostDevPath, relativeDestPath))
- if err != nil {
- return err
- }
-
- paths, err := c.createUnixDevice(uniqueDevPrefix, dummyDevice, false)
- if err != nil {
- return err
- }
- devPath := paths[0]
- if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
- // Add the new device cgroup rule
- dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
- if err != nil {
- return err
- }
-
- err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
- if err != nil {
- return fmt.Errorf("Failed to add cgroup rule for device")
- }
- }
- }
-
- return nil
-}
-
-func (c *containerLXC) addInfinibandDevices(deviceName string, ifDev *IBF, inject bool) error {
- // load all devices
- dents, err := ioutil.ReadDir(c.DevicesPath())
- if err != nil {
- if !os.IsNotExist(err) {
- return err
- }
- }
-
- err = c.addInfinibandDevicesPerPort(deviceName, ifDev, dents, inject)
- if err != nil {
- return err
- }
-
- return c.addInfinibandDevicesPerFun(deviceName, ifDev, inject)
-}
-
-func (c *containerLXC) removeInfinibandDevices(deviceName string, device config.Device) error {
- // load all devices
- dents, err := ioutil.ReadDir(c.DevicesPath())
- if err != nil {
- if !os.IsNotExist(err) {
- return err
- }
- }
-
- tmp := []string{}
- ourInfinibandDevs := []string{}
- prefix := fmt.Sprintf("infiniband.unix.")
- ourPrefix := fmt.Sprintf("infiniband.unix.%s.", deviceName)
- for _, ent := range dents {
- // skip non infiniband.unix.<device-name> devices
- devName := ent.Name()
- if !strings.HasPrefix(devName, prefix) {
- continue
- }
-
- // this is our infiniband device
- if strings.HasPrefix(devName, ourPrefix) {
- ourInfinibandDevs = append(ourInfinibandDevs, devName)
- continue
- }
-
- // this someone else's infiniband device
- tmp = append(tmp, devName)
- }
-
- residualInfinibandDevs := []string{}
- for _, peerDevName := range tmp {
- idx := strings.LastIndex(peerDevName, ".")
- if idx == -1 {
- return fmt.Errorf("Invalid infiniband device name \"%s\"", peerDevName)
- }
- rPeerPath := peerDevName[idx+1:]
- rPeerPath = strings.Replace(rPeerPath, "-", "/", -1)
- absPeerPath := fmt.Sprintf("/%s", rPeerPath)
- residualInfinibandDevs = append(residualInfinibandDevs, absPeerPath)
- }
-
- ourName := fmt.Sprintf("infiniband.unix.%s", deviceName)
- for _, devName := range ourInfinibandDevs {
- idx := strings.LastIndex(devName, ".")
- if idx == -1 {
- return fmt.Errorf("Invalid infiniband device name \"%s\"", devName)
- }
- rPath := devName[idx+1:]
- rPath = strings.Replace(rPath, "-", "/", -1)
- absPath := fmt.Sprintf("/%s", rPath)
-
- dummyDevice := config.Device{
- "path": absPath,
- }
-
- if len(residualInfinibandDevs) == 0 {
- err := c.removeUnixDevice(ourName, dummyDevice, true)
- if err != nil {
- return err
- }
- continue
- }
-
- eject := true
- for _, peerDevPath := range residualInfinibandDevs {
- if peerDevPath == absPath {
- eject = false
- break
- }
- }
-
- err := c.removeUnixDevice(ourName, dummyDevice, eject)
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
func (c *containerLXC) removeUnixDevices() error {
// Check that we indeed have devices to remove
if !shared.PathExists(c.DevicesPath()) {
diff --git a/lxd/container_lxc_infiniband.go b/lxd/container_lxc_infiniband.go
new file mode 100644
index 0000000000..908038aa3b
--- /dev/null
+++ b/lxd/container_lxc_infiniband.go
@@ -0,0 +1,550 @@
+package main
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/pkg/errors"
+
+ "github.com/lxc/lxd/lxd/device"
+ "github.com/lxc/lxd/lxd/device/config"
+ "github.com/lxc/lxd/lxd/util"
+ "github.com/lxc/lxd/shared"
+)
+
+func (c *containerLXC) fillInfinibandSriovNetworkDevice(name string, m config.Device, reservedDevices map[string]struct{}) (config.Device, error) {
+ if m["nictype"] != "sriov" {
+ return m, nil
+ }
+
+ if m["parent"] == "" {
+ return nil, fmt.Errorf("Missing parent for 'sriov' nic '%s'", name)
+ }
+
+ newDevice := config.Device{}
+ err := shared.DeepCopy(&m, &newDevice)
+ if err != nil {
+ return nil, err
+ }
+
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", m["parent"])) {
+ return nil, fmt.Errorf("Parent device '%s' doesn't exist", m["parent"])
+ }
+ sriovNumVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_numvfs", m["parent"])
+ sriovTotalVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_totalvfs", m["parent"])
+
+ // verify that this is indeed a SR-IOV enabled device
+ if !shared.PathExists(sriovTotalVFs) {
+ return nil, fmt.Errorf("Parent device '%s' doesn't support SR-IOV", m["parent"])
+ }
+
+ // Get parent dev_port and dev_id values.
+ pfDevPort, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_port", m["parent"]))
+ if err != nil {
+ return nil, err
+ }
+
+ pfDevID, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_id", m["parent"]))
+ if err != nil {
+ return nil, err
+ }
+
+ // get number of currently enabled VFs
+ sriovNumVfsBuf, err := ioutil.ReadFile(sriovNumVFs)
+ if err != nil {
+ return nil, err
+ }
+ sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
+ sriovNum, err := strconv.Atoi(sriovNumVfsStr)
+ if err != nil {
+ return nil, err
+ }
+
+ // Check if any VFs are already enabled
+ nicName := ""
+ for i := 0; i < sriovNum; i++ {
+ if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i)) {
+ continue
+ }
+
+ // Check if VF is already in use
+ empty, err := shared.PathIsEmpty(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i))
+ if err != nil {
+ return nil, err
+ }
+ if empty {
+ continue
+ }
+
+ vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", m["parent"], i)
+ nicName, err = device.NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
+ if err != nil {
+ return nil, err
+ }
+
+ // Found a free VF.
+ if nicName != "" {
+ break
+ }
+ }
+
+ if nicName == "" {
+ return nil, fmt.Errorf("All virtual functions on device \"%s\" are already in use", name)
+ }
+
+ newDevice["host_name"] = nicName
+ configKey := fmt.Sprintf("volatile.%s.host_name", name)
+ c.localConfig[configKey] = nicName
+
+ return newDevice, nil
+}
+
+func (c *containerLXC) getInfinibandReserved(m config.Device) (map[string]struct{}, error) {
+ instances, err := device.InstanceLoadNodeAll(c.state)
+ if err != nil {
+ return nil, err
+ }
+
+ // Build a unique set of reserved network devices we cannot use.
+ reservedDevices := map[string]struct{}{}
+ for _, instance := range instances {
+ devices := instance.ExpandedDevices()
+ config := instance.ExpandedConfig()
+ for devName, devConfig := range devices {
+ // Record all parent devices, as these are not eligible for use as VFs.
+ parent := devConfig["parent"]
+ reservedDevices[parent] = struct{}{}
+
+ // If the device has the same parent as us, and a non-empty host_name, then
+ // mark that host_name as reserved, as that device is using it.
+ if devConfig["type"] == "infiniband" && parent == m["parent"] {
+ hostName := config[fmt.Sprintf("volatile.%s.host_name", devName)]
+ if hostName != "" {
+ reservedDevices[hostName] = struct{}{}
+ }
+ }
+ }
+ }
+
+ return reservedDevices, nil
+}
+
+func (c *containerLXC) startInfiniband(networkidx int, deviceName string, m config.Device) error {
+ infiniband, err := deviceLoadInfiniband()
+ if err != nil {
+ return err
+ }
+
+ reservedDevices, err := c.getInfinibandReserved(m)
+ if err != nil {
+ return err
+ }
+
+ m, err = c.fillInfinibandSriovNetworkDevice(deviceName, m, reservedDevices)
+ if err != nil {
+ return err
+ }
+
+ err = c.initLXCInfiniband(networkidx, m)
+ if err != nil {
+ return err
+ }
+
+ key := m["parent"]
+ if m["nictype"] == "sriov" {
+ key = m["host_name"]
+ }
+
+ ifDev, ok := infiniband[key]
+ if !ok {
+ return fmt.Errorf("Specified infiniband device \"%s\" not found", key)
+ }
+
+ err = c.addInfinibandDevices(deviceName, &ifDev, false)
+ if err != nil {
+ return err
+ }
+
+ // Important we save this to DB now so other devices starting next can see we reserved this
+ // host_name device.
+ configKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
+ err = c.VolatileSet(map[string]string{configKey: key})
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (c *containerLXC) initLXCInfiniband(networkidx int, m config.Device) error {
+ networkKeyPrefix := "lxc.net"
+ if !util.RuntimeLiblxcVersionAtLeast(2, 1, 0) {
+ networkKeyPrefix = "lxc.network"
+ }
+
+ if m["nictype"] == "physical" || m["nictype"] == "sriov" {
+ err := lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "phys")
+ if err != nil {
+ return err
+ }
+ }
+
+ err := lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.flags", networkKeyPrefix, networkidx), "up")
+ if err != nil {
+ return err
+ }
+
+ if m["nictype"] == "physical" {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), device.NetworkGetHostDevice(m["parent"], m["vlan"]))
+ if err != nil {
+ return err
+ }
+ } else if m["nictype"] == "sriov" {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), m["host_name"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // MAC address
+ if m["hwaddr"] != "" {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.hwaddr", networkKeyPrefix, networkidx), m["hwaddr"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // MTU
+ if m["mtu"] != "" {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.mtu", networkKeyPrefix, networkidx), m["mtu"])
+ if err != nil {
+ return err
+ }
+ }
+
+ // Name
+ if m["name"] != "" {
+ err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.name", networkKeyPrefix, networkidx), m["name"])
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (c *containerLXC) addInfinibandDevicesPerPort(deviceName string, ifDev *IBF, devices []os.FileInfo, inject bool) error {
+ for _, unixCharDev := range ifDev.PerPortDevices {
+ destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+ relDestPath := destPath[1:]
+ devPrefix := fmt.Sprintf("infiniband.unix.%s", deviceName)
+
+ // Unix device
+ dummyDevice := config.Device{
+ "source": destPath,
+ }
+
+ deviceExists := false
+ // only handle infiniband.unix.<device-name>.
+ prefix := fmt.Sprintf("infiniband.unix.")
+ for _, ent := range devices {
+
+ // skip non infiniband.unix.<device-name> devices
+ devName := ent.Name()
+ if !strings.HasPrefix(devName, prefix) {
+ continue
+ }
+
+ // extract the path inside the container
+ idx := strings.LastIndex(devName, ".")
+ if idx == -1 {
+ return fmt.Errorf("Invalid infiniband device name \"%s\"", devName)
+ }
+ rPath := devName[idx+1:]
+ rPath = strings.Replace(rPath, "-", "/", -1)
+ if rPath != relDestPath {
+ continue
+ }
+
+ deviceExists = true
+ break
+ }
+
+ if inject && !deviceExists {
+ err := c.insertUnixDevice(devPrefix, dummyDevice, false)
+ if err != nil {
+ return err
+ }
+ continue
+ }
+
+ paths, err := c.createUnixDevice(devPrefix, dummyDevice, false)
+ if err != nil {
+ return err
+ }
+ devPath := paths[0]
+
+ if deviceExists {
+ continue
+ }
+
+ // inform liblxc about the mount
+ err = lxcSetConfigItem(c.c, "lxc.mount.entry",
+ fmt.Sprintf("%s %s none bind,create=file 0 0",
+ shared.EscapePathFstab(devPath),
+ shared.EscapePathFstab(relDestPath)))
+ if err != nil {
+ return err
+ }
+
+ if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+ // Add the new device cgroup rule
+ dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
+ if err != nil {
+ return err
+ }
+
+ err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
+ if err != nil {
+ return fmt.Errorf("Failed to add cgroup rule for device")
+ }
+ }
+ }
+
+ return nil
+}
+
+func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF, inject bool) error {
+ for _, unixCharDev := range ifDev.PerFunDevices {
+ destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+ uniqueDevPrefix := fmt.Sprintf("infiniband.unix.%s", deviceName)
+ relativeDestPath := fmt.Sprintf("dev/infiniband/%s", unixCharDev)
+ uniqueDevName := fmt.Sprintf("%s.%s", uniqueDevPrefix, strings.Replace(relativeDestPath, "/", "-", -1))
+ hostDevPath := filepath.Join(c.DevicesPath(), uniqueDevName)
+
+ dummyDevice := config.Device{
+ "source": destPath,
+ }
+
+ if inject {
+ err := c.insertUnixDevice(uniqueDevPrefix, dummyDevice, false)
+ if err != nil {
+ return err
+ }
+ continue
+ }
+
+ // inform liblxc about the mount
+ err := lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file 0 0", hostDevPath, relativeDestPath))
+ if err != nil {
+ return err
+ }
+
+ paths, err := c.createUnixDevice(uniqueDevPrefix, dummyDevice, false)
+ if err != nil {
+ return err
+ }
+ devPath := paths[0]
+ if c.isCurrentlyPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+ // Add the new device cgroup rule
+ dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
+ if err != nil {
+ return err
+ }
+
+ err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
+ if err != nil {
+ return fmt.Errorf("Failed to add cgroup rule for device")
+ }
+ }
+ }
+
+ return nil
+}
+
+func (c *containerLXC) addInfinibandDevices(deviceName string, ifDev *IBF, inject bool) error {
+ // load all devices
+ dents, err := ioutil.ReadDir(c.DevicesPath())
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return err
+ }
+ }
+
+ err = c.addInfinibandDevicesPerPort(deviceName, ifDev, dents, inject)
+ if err != nil {
+ return err
+ }
+
+ return c.addInfinibandDevicesPerFun(deviceName, ifDev, inject)
+}
+
+func (c *containerLXC) addInfinibandDevice(deviceName string, device config.Device) error {
+ device, err := c.fillNetworkDevice(deviceName, device)
+ if err != nil {
+ return err
+ }
+
+ var infiniband map[string]IBF
+ if device["type"] == "infiniband" {
+ infiniband, err = deviceLoadInfiniband()
+ if err != nil {
+ return err
+ }
+ }
+
+ reservedDevices, err := c.getInfinibandReserved(device)
+ if err != nil {
+ return err
+ }
+
+ device, err = c.fillInfinibandSriovNetworkDevice(deviceName, device, reservedDevices)
+ if err != nil {
+ return err
+ }
+
+ key := device["parent"]
+ if device["nictype"] == "sriov" {
+ key = device["host_name"]
+ }
+
+ ifDev, ok := infiniband[key]
+ if !ok {
+ return fmt.Errorf("Specified infiniband device \"%s\" not found", key)
+ }
+
+ err = c.addInfinibandDevices(deviceName, &ifDev, true)
+ if err != nil {
+ return err
+ }
+
+ // Add the interface to the container.
+ err = c.c.AttachInterface(key, device["name"])
+ if err != nil {
+ return fmt.Errorf("Failed to attach interface: %s to %s: %s", key, device["name"], err)
+ }
+
+ // Important we save this to DB now so other devices starting next can see we reserved this
+ // host_name device.
+ configKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
+ err = c.VolatileSet(map[string]string{configKey: key})
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (c *containerLXC) removeInfinibandDevice(deviceName string, device config.Device) error {
+ device, err := c.fillNetworkDevice(deviceName, device)
+ if err != nil {
+ return err
+ }
+
+ // load all devices
+ dents, err := ioutil.ReadDir(c.DevicesPath())
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return err
+ }
+ }
+
+ tmp := []string{}
+ ourInfinibandDevs := []string{}
+ prefix := fmt.Sprintf("infiniband.unix.")
+ ourPrefix := fmt.Sprintf("infiniband.unix.%s.", deviceName)
+ for _, ent := range dents {
+ // skip non infiniband.unix.<device-name> devices
+ devName := ent.Name()
+ if !strings.HasPrefix(devName, prefix) {
+ continue
+ }
+
+ // this is our infiniband device
+ if strings.HasPrefix(devName, ourPrefix) {
+ ourInfinibandDevs = append(ourInfinibandDevs, devName)
+ continue
+ }
+
+ // this someone else's infiniband device
+ tmp = append(tmp, devName)
+ }
+
+ residualInfinibandDevs := []string{}
+ for _, peerDevName := range tmp {
+ idx := strings.LastIndex(peerDevName, ".")
+ if idx == -1 {
+ return fmt.Errorf("Invalid infiniband device name \"%s\"", peerDevName)
+ }
+ rPeerPath := peerDevName[idx+1:]
+ rPeerPath = strings.Replace(rPeerPath, "-", "/", -1)
+ absPeerPath := fmt.Sprintf("/%s", rPeerPath)
+ residualInfinibandDevs = append(residualInfinibandDevs, absPeerPath)
+ }
+
+ ourName := fmt.Sprintf("infiniband.unix.%s", deviceName)
+ for _, devName := range ourInfinibandDevs {
+ idx := strings.LastIndex(devName, ".")
+ if idx == -1 {
+ return fmt.Errorf("Invalid infiniband device name \"%s\"", devName)
+ }
+ rPath := devName[idx+1:]
+ rPath = strings.Replace(rPath, "-", "/", -1)
+ absPath := fmt.Sprintf("/%s", rPath)
+
+ dummyDevice := config.Device{
+ "path": absPath,
+ }
+
+ if len(residualInfinibandDevs) == 0 {
+ err := c.removeUnixDevice(ourName, dummyDevice, true)
+ if err != nil {
+ return err
+ }
+ continue
+ }
+
+ eject := true
+ for _, peerDevPath := range residualInfinibandDevs {
+ if peerDevPath == absPath {
+ eject = false
+ break
+ }
+ }
+
+ err := c.removeUnixDevice(ourName, dummyDevice, eject)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Remove the interface from the container.
+ hostName := c.localConfig[fmt.Sprintf("volatile.%s.host_name", deviceName)]
+ if hostName != "" {
+ err = c.c.DetachInterfaceRename(device["name"], hostName)
+ if err != nil {
+ return errors.Wrapf(err, "Failed to detach interface: %s to %s", device["name"], hostName)
+ }
+ }
+
+ err = c.clearInfinibandVolatile(deviceName)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (c *containerLXC) clearInfinibandVolatile(deviceName string) error {
+ configKey := fmt.Sprintf("volatile.%s.host_name", deviceName)
+ err := c.VolatileSet(map[string]string{configKey: ""})
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
From c72f8622ce6f8b59e9703283f13e9adb5a53b613 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 25 Jul 2019 12:17:51 +0100
Subject: [PATCH 32/32] device/utils: Moves DevicesPath() to device package
Updates usage in container_lxc*.go
This is so the proxy device can use this function in the future.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container_lxc.go | 57 +++++++++++++----------------
lxd/container_lxc_infiniband.go | 14 +++----
lxd/device/device_utils_instance.go | 7 ++++
3 files changed, 40 insertions(+), 38 deletions(-)
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 5bb2d95e2d..61f4c9748b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1655,7 +1655,7 @@ func (c *containerLXC) initLXC(config bool) error {
}
relativeDestPath := strings.TrimPrefix(destPath, "/")
- sourceDevPath := filepath.Join(c.DevicesPath(), fmt.Sprintf("unix.%s.%s", strings.Replace(k, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1)))
+ sourceDevPath := filepath.Join(device.DevicesPath(c), fmt.Sprintf("unix.%s.%s", strings.Replace(k, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1)))
// Don't add mount entry for devices that don't yet exist
if m["required"] != "" && !shared.IsTrue(m["required"]) && srcPath != "" && !shared.PathExists(srcPath) {
@@ -1681,7 +1681,7 @@ func (c *containerLXC) initLXC(config bool) error {
destPath := m["path"]
relativeDestPath := strings.TrimPrefix(destPath, "/")
- sourceDevPath := filepath.Join(c.DevicesPath(), fmt.Sprintf("disk.%s.%s", strings.Replace(k, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1)))
+ sourceDevPath := filepath.Join(device.DevicesPath(c), fmt.Sprintf("disk.%s.%s", strings.Replace(k, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1)))
// Various option checks
isOptional := shared.IsTrue(m["optional"])
@@ -2528,7 +2528,7 @@ func (c *containerLXC) startCommon() (string, error) {
return "", err
}
- err = os.MkdirAll(c.DevicesPath(), 0711)
+ err = os.MkdirAll(device.DevicesPath(c), 0711)
if err != nil {
return "", err
}
@@ -3723,7 +3723,7 @@ func (c *containerLXC) cleanup() {
SeccompDeleteProfile(c)
// Remove the devices path
- os.Remove(c.DevicesPath())
+ os.Remove(device.DevicesPath(c))
// Remove the shmounts path
os.RemoveAll(c.ShmountsPath())
@@ -6822,7 +6822,7 @@ func (c *containerLXC) removeMount(mount string) error {
func (c *containerLXC) deviceExistsInDevicesFolder(prefix string, path string) bool {
relativeDestPath := strings.TrimPrefix(path, "/")
devName := fmt.Sprintf("%s.%s", strings.Replace(prefix, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
+ devPath := filepath.Join(device.DevicesPath(c), devName)
return shared.PathExists(devPath)
}
@@ -6912,8 +6912,8 @@ func (c *containerLXC) createUnixDevice(prefix string, m config.Device, defaultM
}
// Create the devices directory if missing
- if !shared.PathExists(c.DevicesPath()) {
- os.Mkdir(c.DevicesPath(), 0711)
+ if !shared.PathExists(device.DevicesPath(c)) {
+ os.Mkdir(device.DevicesPath(c), 0711)
if err != nil {
return nil, fmt.Errorf("Failed to create devices path: %s", err)
}
@@ -6925,7 +6925,7 @@ func (c *containerLXC) createUnixDevice(prefix string, m config.Device, defaultM
}
relativeDestPath := strings.TrimPrefix(destPath, "/")
devName := fmt.Sprintf("%s.%s", strings.Replace(prefix, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
+ devPath := filepath.Join(device.DevicesPath(c), devName)
// Create the new entry
if !c.state.OS.RunningInUserNS {
@@ -7134,7 +7134,7 @@ func (c *containerLXC) removeUnixDevice(prefix string, m config.Device, eject bo
}
relativeDestPath := strings.TrimPrefix(destPath, "/")
devName := fmt.Sprintf("%s.%s", strings.Replace(prefix, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
+ devPath := filepath.Join(device.DevicesPath(c), devName)
if dType == "" || dMajor < 0 || dMinor < 0 {
dType, dMajor, dMinor, err = deviceGetAttributes(devPath)
@@ -7203,12 +7203,12 @@ func (c *containerLXC) removeUnixDeviceNum(prefix string, m config.Device, major
func (c *containerLXC) removeUnixDevices() error {
// Check that we indeed have devices to remove
- if !shared.PathExists(c.DevicesPath()) {
+ if !shared.PathExists(device.DevicesPath(c)) {
return nil
}
// Load the directory listing
- dents, err := ioutil.ReadDir(c.DevicesPath())
+ dents, err := ioutil.ReadDir(device.DevicesPath(c))
if err != nil {
return err
}
@@ -7221,7 +7221,7 @@ func (c *containerLXC) removeUnixDevices() error {
}
// Remove the entry
- devicePath := filepath.Join(c.DevicesPath(), f.Name())
+ devicePath := filepath.Join(device.DevicesPath(c), f.Name())
err := os.Remove(devicePath)
if err != nil {
logger.Error("Failed removing unix device", log.Ctx{"err": err, "path": devicePath})
@@ -7246,7 +7246,7 @@ func (c *containerLXC) insertProxyDevice(devName string, m config.Device) error
}
devFileName := fmt.Sprintf("proxy.%s", devName)
- pidPath := filepath.Join(c.DevicesPath(), devFileName)
+ pidPath := filepath.Join(device.DevicesPath(c), devFileName)
logFileName := fmt.Sprintf("proxy.%s.log", devName)
logPath := filepath.Join(c.LogPath(), logFileName)
@@ -7399,7 +7399,7 @@ func (c *containerLXC) removeProxyDevice(devName string) error {
iptables.ContainerClear("ipv6", fmt.Sprintf("%s (%s)", c.Name(), devName), "nat")
devFileName := fmt.Sprintf("proxy.%s", devName)
- devPath := filepath.Join(c.DevicesPath(), devFileName)
+ devPath := filepath.Join(device.DevicesPath(c), devFileName)
if !shared.PathExists(devPath) {
// There's no proxy process if NAT is enabled
@@ -7420,12 +7420,12 @@ func (c *containerLXC) removeProxyDevices() error {
iptables.ContainerClear("ipv6", fmt.Sprintf("%s", c.Name()), "nat")
// Check that we actually have devices to remove
- if !shared.PathExists(c.DevicesPath()) {
+ if !shared.PathExists(device.DevicesPath(c)) {
return nil
}
// Load the directory listing
- devFiles, err := ioutil.ReadDir(c.DevicesPath())
+ devFiles, err := ioutil.ReadDir(device.DevicesPath(c))
if err != nil {
return err
}
@@ -7437,7 +7437,7 @@ func (c *containerLXC) removeProxyDevices() error {
}
// Kill the process
- devicePath := filepath.Join(c.DevicesPath(), f.Name())
+ devicePath := filepath.Join(device.DevicesPath(c), f.Name())
err = killProxyProc(devicePath)
if err != nil {
logger.Error("Failed removing proxy device", log.Ctx{"err": err, "path": devicePath})
@@ -7453,7 +7453,7 @@ func (c *containerLXC) updateProxyDevice(devName string, m config.Device) error
}
devFileName := fmt.Sprintf("proxy.%s", devName)
- pidPath := filepath.Join(c.DevicesPath(), devFileName)
+ pidPath := filepath.Join(device.DevicesPath(c), devFileName)
err := killProxyProc(pidPath)
if err != nil {
return fmt.Errorf("Error occurred when removing old proxy device: %v", err)
@@ -7644,7 +7644,7 @@ func (c *containerLXC) createDiskDevice(name string, m config.Device) (string, e
// source paths
relativeDestPath := strings.TrimPrefix(m["path"], "/")
devName := fmt.Sprintf("disk.%s.%s", strings.Replace(name, "/", "-", -1), strings.Replace(relativeDestPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
+ devPath := filepath.Join(device.DevicesPath(c), devName)
srcPath := shared.HostPath(m["source"])
// Check if read-only
@@ -7728,8 +7728,8 @@ func (c *containerLXC) createDiskDevice(name string, m config.Device) (string, e
}
// Create the devices directory if missing
- if !shared.PathExists(c.DevicesPath()) {
- err := os.Mkdir(c.DevicesPath(), 0711)
+ if !shared.PathExists(device.DevicesPath(c)) {
+ err := os.Mkdir(device.DevicesPath(c), 0711)
if err != nil {
return "", err
}
@@ -7851,7 +7851,7 @@ func (c *containerLXC) removeDiskDevice(name string, m config.Device) error {
// Figure out the paths
destPath := strings.TrimPrefix(m["path"], "/")
devName := fmt.Sprintf("disk.%s.%s", strings.Replace(name, "/", "-", -1), strings.Replace(destPath, "/", "-", -1))
- devPath := filepath.Join(c.DevicesPath(), devName)
+ devPath := filepath.Join(device.DevicesPath(c), devName)
// The disk device doesn't exist.
if !shared.PathExists(devPath) {
@@ -7894,12 +7894,12 @@ func (c *containerLXC) removeDiskDevice(name string, m config.Device) error {
func (c *containerLXC) removeDiskDevices() error {
// Check that we indeed have devices to remove
- if !shared.PathExists(c.DevicesPath()) {
+ if !shared.PathExists(device.DevicesPath(c)) {
return nil
}
// Load the directory listing
- dents, err := ioutil.ReadDir(c.DevicesPath())
+ dents, err := ioutil.ReadDir(device.DevicesPath(c))
if err != nil {
return err
}
@@ -7912,10 +7912,10 @@ func (c *containerLXC) removeDiskDevices() error {
}
// Always try to unmount the host side
- _ = unix.Unmount(filepath.Join(c.DevicesPath(), f.Name()), unix.MNT_DETACH)
+ _ = unix.Unmount(filepath.Join(device.DevicesPath(c), f.Name()), unix.MNT_DETACH)
// Remove the entry
- diskPath := filepath.Join(c.DevicesPath(), f.Name())
+ diskPath := filepath.Join(device.DevicesPath(c), f.Name())
err := os.Remove(diskPath)
if err != nil {
logger.Error("Failed to remove disk device path", log.Ctx{"err": err, "path": diskPath})
@@ -8277,11 +8277,6 @@ func (c *containerLXC) Path() string {
return containerPath(name, c.IsSnapshot())
}
-func (c *containerLXC) DevicesPath() string {
- name := project.Prefix(c.Project(), c.Name())
- return shared.VarPath("devices", name)
-}
-
func (c *containerLXC) ShmountsPath() string {
name := project.Prefix(c.Project(), c.Name())
return shared.VarPath("shmounts", name)
diff --git a/lxd/container_lxc_infiniband.go b/lxd/container_lxc_infiniband.go
index 908038aa3b..5c8c7416f1 100644
--- a/lxd/container_lxc_infiniband.go
+++ b/lxd/container_lxc_infiniband.go
@@ -324,7 +324,7 @@ func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF,
uniqueDevPrefix := fmt.Sprintf("infiniband.unix.%s", deviceName)
relativeDestPath := fmt.Sprintf("dev/infiniband/%s", unixCharDev)
uniqueDevName := fmt.Sprintf("%s.%s", uniqueDevPrefix, strings.Replace(relativeDestPath, "/", "-", -1))
- hostDevPath := filepath.Join(c.DevicesPath(), uniqueDevName)
+ hostDevPath := filepath.Join(device.DevicesPath(c), uniqueDevName)
dummyDevice := config.Device{
"source": destPath,
@@ -368,7 +368,7 @@ func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF,
func (c *containerLXC) addInfinibandDevices(deviceName string, ifDev *IBF, inject bool) error {
// load all devices
- dents, err := ioutil.ReadDir(c.DevicesPath())
+ dents, err := ioutil.ReadDir(device.DevicesPath(c))
if err != nil {
if !os.IsNotExist(err) {
return err
@@ -439,14 +439,14 @@ func (c *containerLXC) addInfinibandDevice(deviceName string, device config.Devi
return nil
}
-func (c *containerLXC) removeInfinibandDevice(deviceName string, device config.Device) error {
- device, err := c.fillNetworkDevice(deviceName, device)
+func (c *containerLXC) removeInfinibandDevice(deviceName string, m config.Device) error {
+ m, err := c.fillNetworkDevice(deviceName, m)
if err != nil {
return err
}
// load all devices
- dents, err := ioutil.ReadDir(c.DevicesPath())
+ dents, err := ioutil.ReadDir(device.DevicesPath(c))
if err != nil {
if !os.IsNotExist(err) {
return err
@@ -525,9 +525,9 @@ func (c *containerLXC) removeInfinibandDevice(deviceName string, device config.D
// Remove the interface from the container.
hostName := c.localConfig[fmt.Sprintf("volatile.%s.host_name", deviceName)]
if hostName != "" {
- err = c.c.DetachInterfaceRename(device["name"], hostName)
+ err = c.c.DetachInterfaceRename(m["name"], hostName)
if err != nil {
- return errors.Wrapf(err, "Failed to detach interface: %s to %s", device["name"], hostName)
+ return errors.Wrapf(err, "Failed to detach interface: %s to %s", m["name"], hostName)
}
}
diff --git a/lxd/device/device_utils_instance.go b/lxd/device/device_utils_instance.go
index 52226afb4f..28099a338e 100644
--- a/lxd/device/device_utils_instance.go
+++ b/lxd/device/device_utils_instance.go
@@ -1,8 +1,15 @@
package device
import (
+ "github.com/lxc/lxd/lxd/project"
"github.com/lxc/lxd/lxd/state"
+ "github.com/lxc/lxd/shared"
)
// InstanceLoadNodeAll returns all local instance configs.
var InstanceLoadNodeAll func(s *state.State) ([]InstanceIdentifier, error)
+
+// DevicesPath returns the path where devices are created for an instance.
+func DevicesPath(instance InstanceIdentifier) string {
+ return shared.VarPath("devices", project.Prefix(instance.Project(), instance.Name()))
+}
More information about the lxc-devel
mailing list