[lxc-devel] [pylxd/master] Image import - memory hog fix

shurkaxaa on Github lxc-bot at linuxcontainers.org
Wed Sep 27 15:40:00 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 831 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20170927/372e5fa0/attachment.bin>
-------------- next part --------------
From 0e3ec068f05c57b4d6d37f9cda26a80b48857154 Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 20 Sep 2017 07:43:35 +0000
Subject: [PATCH 1/8] Add ability for stream based image upload without loading
 them to memory. requests-toolbetl external package required as embedded to
 requests multipart 'files' based upload still load parts to memory.

---
 pylxd/models/image.py | 19 ++++++++++++++++---
 requirements.txt      |  1 +
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index deaeeab..85dce56 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -11,10 +11,12 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+import collections
 import contextlib
 import tempfile
 import uuid
 import warnings
+from requests_toolbelt import MultipartEncoder
 
 import six
 
@@ -99,7 +101,8 @@ def all(cls, client):
 
     @classmethod
     def create(
-            cls, client, image_data, metadata=None, public=False, wait=True):
+            cls, client, image_data,
+            metadata=None, public=False, wait=True, from_streams=False):
         """Create an image.
 
         If metadata is provided, a multipart form data request is formed to
@@ -119,7 +122,16 @@ def create(
         if public:
             headers['X-LXD-Public'] = '1'
 
-        if metadata is not None:
+        if from_streams is not None:
+            # Image uploaded as streamed (metadata, rootfs) multipart message
+            # order is important metadata should be passed first
+            files = collections.OrderedDict(
+                metadata=('metadata', metadata, 'application/octet-stream'),
+                rootfs=('rootfs', image_data, 'application/octet-stream'))
+            data = MultipartEncoder(files)
+            headers.update({"Content-Type": data.content_type})
+        elif metadata is not None:
+            # in-memory file upload for clients backward compatibility
             boundary = str(uuid.uuid1())
 
             data = b''
@@ -142,7 +154,8 @@ def create(
         else:
             data = image_data
 
-        response = client.api.images.post(data=data, headers=headers)
+        response = client.api.images.post(
+            data=data, files=None, headers=headers)
         operation = client.operations.wait_for_operation(
             response.json()['operation'])
         return cls(client, fingerprint=operation.metadata['fingerprint'])
diff --git a/requirements.txt b/requirements.txt
index 257f051..1098cc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,6 @@ six>=1.9.0
 ws4py!=0.3.5,>=0.3.4  # 0.3.5 is broken for websocket support
 requests!=2.8.0,!=2.12.0,!=2.12.1,>=2.5.2
 requests-unixsocket>=0.1.5
+requests-toolbelt>=0.8.0
 cryptography!=1.3.0,>=1.0
 pyOpenSSL>=0.14;python_version<='2.7.8'

From 2781158cc151c489d7d11afed67aa7d6e7ecbc54 Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 20 Sep 2017 07:43:35 +0000
Subject: [PATCH 2/8] Add ability for stream based image upload without loading
 them to memory. requests-toolbetl external package required as embedded to
 requests multipart 'files' based upload still load parts to memory.

---
 pylxd/models/image.py | 17 +++++++++++++++--
 requirements.txt      |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index deaeeab..1991d96 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -11,10 +11,12 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+import collections
 import contextlib
 import tempfile
 import uuid
 import warnings
+from requests_toolbelt import MultipartEncoder
 
 import six
 
@@ -99,7 +101,8 @@ def all(cls, client):
 
     @classmethod
     def create(
-            cls, client, image_data, metadata=None, public=False, wait=True):
+            cls, client, image_data,
+            metadata=None, public=False, wait=True, from_streams=False):
         """Create an image.
 
         If metadata is provided, a multipart form data request is formed to
@@ -119,7 +122,17 @@ def create(
         if public:
             headers['X-LXD-Public'] = '1'
 
-        if metadata is not None:
+        if from_streams is not None:
+            # Image uploaded as chunked/stream (metadata, rootfs)
+            # multipart message.
+            # Order of parts is important metadata should be passed first
+            files = collections.OrderedDict(
+                metadata=('metadata', metadata, 'application/octet-stream'),
+                rootfs=('rootfs', image_data, 'application/octet-stream'))
+            data = MultipartEncoder(files)
+            headers.update({"Content-Type": data.content_type})
+        elif metadata is not None:
+            # in-memory file upload for clients backward compatibility
             boundary = str(uuid.uuid1())
 
             data = b''
diff --git a/requirements.txt b/requirements.txt
index 257f051..1098cc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,6 @@ six>=1.9.0
 ws4py!=0.3.5,>=0.3.4  # 0.3.5 is broken for websocket support
 requests!=2.8.0,!=2.12.0,!=2.12.1,>=2.5.2
 requests-unixsocket>=0.1.5
+requests-toolbelt>=0.8.0
 cryptography!=1.3.0,>=1.0
 pyOpenSSL>=0.14;python_version<='2.7.8'

From 765e42182a5657fa0d04c2d5fce912d4a2926828 Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Thu, 21 Sep 2017 02:58:00 +0000
Subject: [PATCH 3/8] Remove unneeded changes

---
 pylxd/models/image.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index 8e378e8..1991d96 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -155,8 +155,7 @@ def create(
         else:
             data = image_data
 
-        response = client.api.images.post(
-            data=data, files=None, headers=headers)
+        response = client.api.images.post(data=data, headers=headers)
         operation = client.operations.wait_for_operation(
             response.json()['operation'])
         return cls(client, fingerprint=operation.metadata['fingerprint'])

From 7d9d832d543cf6dd4837fd81e67e654247e6dd8f Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 20 Sep 2017 07:43:35 +0000
Subject: [PATCH 4/8] Add ability for stream based image upload without loading
 them to memory. requests-toolbetl external package required as embedded to
 requests multipart 'files' based upload still load parts to memory.

---
 pylxd/models/image.py | 19 ++++++++++++++++---
 requirements.txt      |  1 +
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index deaeeab..85dce56 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -11,10 +11,12 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+import collections
 import contextlib
 import tempfile
 import uuid
 import warnings
+from requests_toolbelt import MultipartEncoder
 
 import six
 
@@ -99,7 +101,8 @@ def all(cls, client):
 
     @classmethod
     def create(
-            cls, client, image_data, metadata=None, public=False, wait=True):
+            cls, client, image_data,
+            metadata=None, public=False, wait=True, from_streams=False):
         """Create an image.
 
         If metadata is provided, a multipart form data request is formed to
@@ -119,7 +122,16 @@ def create(
         if public:
             headers['X-LXD-Public'] = '1'
 
-        if metadata is not None:
+        if from_streams is not None:
+            # Image uploaded as streamed (metadata, rootfs) multipart message
+            # order is important metadata should be passed first
+            files = collections.OrderedDict(
+                metadata=('metadata', metadata, 'application/octet-stream'),
+                rootfs=('rootfs', image_data, 'application/octet-stream'))
+            data = MultipartEncoder(files)
+            headers.update({"Content-Type": data.content_type})
+        elif metadata is not None:
+            # in-memory file upload for clients backward compatibility
             boundary = str(uuid.uuid1())
 
             data = b''
@@ -142,7 +154,8 @@ def create(
         else:
             data = image_data
 
-        response = client.api.images.post(data=data, headers=headers)
+        response = client.api.images.post(
+            data=data, files=None, headers=headers)
         operation = client.operations.wait_for_operation(
             response.json()['operation'])
         return cls(client, fingerprint=operation.metadata['fingerprint'])
diff --git a/requirements.txt b/requirements.txt
index 257f051..1098cc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,6 @@ six>=1.9.0
 ws4py!=0.3.5,>=0.3.4  # 0.3.5 is broken for websocket support
 requests!=2.8.0,!=2.12.0,!=2.12.1,>=2.5.2
 requests-unixsocket>=0.1.5
+requests-toolbelt>=0.8.0
 cryptography!=1.3.0,>=1.0
 pyOpenSSL>=0.14;python_version<='2.7.8'

From f2117b0e5e618aeb3639a3fe5eddc2637783766a Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 20 Sep 2017 07:43:35 +0000
Subject: [PATCH 5/8] Add ability for stream based image upload without loading
 them to memory. requests-toolbetl external package required as embedded to
 requests multipart 'files' based upload still load parts to memory.

---
 pylxd/models/image.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index 85dce56..8e378e8 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -123,8 +123,9 @@ def create(
             headers['X-LXD-Public'] = '1'
 
         if from_streams is not None:
-            # Image uploaded as streamed (metadata, rootfs) multipart message
-            # order is important metadata should be passed first
+            # Image uploaded as chunked/stream (metadata, rootfs)
+            # multipart message.
+            # Order of parts is important metadata should be passed first
             files = collections.OrderedDict(
                 metadata=('metadata', metadata, 'application/octet-stream'),
                 rootfs=('rootfs', image_data, 'application/octet-stream'))

From 31dcd1bf5a009c7a542158c31d365a730ec454e2 Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Thu, 21 Sep 2017 02:58:00 +0000
Subject: [PATCH 6/8] Remove unneeded changes

---
 pylxd/models/image.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index 8e378e8..1991d96 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -155,8 +155,7 @@ def create(
         else:
             data = image_data
 
-        response = client.api.images.post(
-            data=data, files=None, headers=headers)
+        response = client.api.images.post(data=data, headers=headers)
         operation = client.operations.wait_for_operation(
             response.json()['operation'])
         return cls(client, fingerprint=operation.metadata['fingerprint'])

From 84564312ad541a137e5fddb745b2db92aa5d657e Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 27 Sep 2017 13:26:19 +0000
Subject: [PATCH 7/8] Fix condition

---
 pylxd/models/image.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylxd/models/image.py b/pylxd/models/image.py
index 1991d96..0850008 100644
--- a/pylxd/models/image.py
+++ b/pylxd/models/image.py
@@ -122,7 +122,7 @@ def create(
         if public:
             headers['X-LXD-Public'] = '1'
 
-        if from_streams is not None:
+        if from_streams:
             # Image uploaded as chunked/stream (metadata, rootfs)
             # multipart message.
             # Order of parts is important metadata should be passed first

From ad5385ba5f2d399bdbf5561186cc8b15d307279f Mon Sep 17 00:00:00 2001
From: Alexander Kharkov <kharkovalexander at gmail.com>
Date: Wed, 27 Sep 2017 13:33:58 +0000
Subject: [PATCH 8/8] Coverage for new create image chain

---
 pylxd/tests/models/test_image.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pylxd/tests/models/test_image.py b/pylxd/tests/models/test_image.py
index 7040a2d..e901564 100644
--- a/pylxd/tests/models/test_image.py
+++ b/pylxd/tests/models/test_image.py
@@ -1,6 +1,7 @@
 import hashlib
 import json
 
+from io import StringIO
 from pylxd import exceptions, models
 from pylxd.tests import testing
 
@@ -115,6 +116,16 @@ def test_create_with_metadata(self):
         self.assertIsInstance(a_image, models.Image)
         self.assertEqual(fingerprint, a_image.fingerprint)
 
+    def test_create_with_metadata_from_streams(self):
+        """An image with metadata is created."""
+        fingerprint = hashlib.sha256(b'').hexdigest()
+        a_image = models.Image.create(
+            self.client, StringIO(u''), metadata=StringIO(u''),
+            public=True, wait=True, from_streams=True)
+
+        self.assertIsInstance(a_image, models.Image)
+        self.assertEqual(fingerprint, a_image.fingerprint)
+
     def test_update(self):
         """An image is updated."""
         a_image = self.client.images.all()[0]


More information about the lxc-devel mailing list