Make copying to/from storage systems memory efficient (#419)

The copy_locally and copy_local_to_storage (very inconsistent terms BTW)
were simply slurping in everything in RAM and writing it out at once.
(the copy_locally was actually memory efficient if the remote system was local)

Use shutil.copyfileobj which does chunked reads/writes on file objects.
The default buffer size is 16kb, and as each chunk means a separate HTTP
request for e.g. cloudfiles, we use a chunksize of 4MB here (which has
just been arbitrarily set by me without tests).

This should help with the failure to upload large files issue #419.
This commit is contained in:
Sebastian Spaeth 2012-12-19 11:06:51 +01:00 committed by Christopher Allan Webber
parent 3ff4f75203
commit 99a54c0095
2 changed files with 8 additions and 7 deletions

View File

@ -160,12 +160,13 @@ class StorageInterface(object):
appropriate.
"""
if self.local_storage:
shutil.copy(
self.get_local_path(filepath), dest_path)
# Note: this will copy in small chunks
shutil.copy(self.get_local_path(filepath), dest_path)
else:
with self.get_file(filepath, 'rb') as source_file:
with file(dest_path, 'wb') as dest_file:
dest_file.write(source_file.read())
# Copy from remote storage in 4M chunks
shutil.copyfileobj(source_file, dest_file, length=4*1048576)
def copy_local_to_storage(self, filename, filepath):
"""
@ -177,7 +178,8 @@ class StorageInterface(object):
"""
with self.get_file(filepath, 'wb') as dest_file:
with file(filename, 'rb') as source_file:
dest_file.write(source_file.read())
# Copy to storage system in 4M chunks
shutil.copyfileobj(source_file, dest_file, length=4*1048576)
###########

View File

@ -87,6 +87,5 @@ class BasicFileStorage(StorageInterface):
directory = self._resolve_filepath(filepath[:-1])
if not os.path.exists(directory):
os.makedirs(directory)
shutil.copy(
filename, self.get_local_path(filepath))
# This uses chunked copying of 16kb buffers (Py2.7):
shutil.copy(filename, self.get_local_path(filepath))