Fix URL-based importing with batchaddmedia command.

The command was raising "io.UnsupportedOperation: seek" for URL-based imports
and didn't support HTTPS.
This commit is contained in:
Ben Sturmfels 2019-09-12 14:47:45 +10:00
parent 9618899480
commit 88ecf6a340
No known key found for this signature in database
GPG Key ID: 023C05E2C9C068F0

View File

@ -19,7 +19,9 @@ from __future__ import print_function
import codecs import codecs
import csv import csv
import os import os
import shutil
import sys import sys
import tempfile
import requests import requests
import six import six
@ -128,9 +130,21 @@ Metadata was not uploaded.""".format(
url = urlparse(original_location) url = urlparse(original_location)
filename = url.path.split()[-1] filename = url.path.split()[-1]
if url.scheme == 'http': if url.scheme.startswith('http'):
res = requests.get(url.geturl(), stream=True) res = requests.get(url.geturl(), stream=True)
media_file = res.raw if res.headers.get('content-encoding'):
# The requests library's "raw" method does not deal with content
# encoding. Alternative could be to use iter_content(), and
# write chunks to the temporary file.
raise NotImplementedError('URL-based media with content-encoding (eg. gzip) are not currently supported.')
# To avoid loading the media into memory all at once, we write it to
# a file before importing. This currently requires free space up to
# twice the size of the media file. Memory use can be tested by
# running something like `ulimit -Sv 200000` before running
# `batchaddmedia` to upload a file larger than 200MB.
media_file = tempfile.TemporaryFile()
shutil.copyfileobj(res.raw, media_file)
elif url.scheme == '': elif url.scheme == '':
path = url.path path = url.path
@ -170,6 +184,8 @@ u"FAIL: This file is larger than the upload limits for this site."))
"FAIL: This file will put this user past their upload limits.")) "FAIL: This file will put this user past their upload limits."))
except UserPastUploadLimit: except UserPastUploadLimit:
print(_("FAIL: This user is already past their upload limits.")) print(_("FAIL: This user is already past their upload limits."))
finally:
media_file.close()
print(_( print(_(
"{files_uploaded} out of {files_attempted} files successfully submitted".format( "{files_uploaded} out of {files_attempted} files successfully submitted".format(
files_uploaded=files_uploaded, files_uploaded=files_uploaded,