batchaddmedia: Simplify the Python 2 & 3 unicode handling.
To avoid issues with quotes, I've replaced the manual CSV handling with csv.DictReader and simplified the unicode handling down to a single line. I don't believe any special encoding is required when writing to the database. This has been tested by importing a CSV with braille characters on Python 3 and Python 2.
This commit is contained in:
parent
eb36543364
commit
8f18381bbc
@ -16,16 +16,13 @@
|
|||||||
|
|
||||||
from __future__ import print_function, unicode_literals
|
from __future__ import print_function, unicode_literals
|
||||||
|
|
||||||
import codecs
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from six.moves.urllib.parse import urlparse
|
from six.moves.urllib.parse import urlparse
|
||||||
|
|
||||||
from mediagoblin.db.models import LocalUser
|
from mediagoblin.db.models import LocalUser
|
||||||
@ -87,19 +84,13 @@ def batchaddmedia(args):
|
|||||||
abs_metadata_filename = os.path.abspath(metadata_path)
|
abs_metadata_filename = os.path.abspath(metadata_path)
|
||||||
abs_metadata_dir = os.path.dirname(abs_metadata_filename)
|
abs_metadata_dir = os.path.dirname(abs_metadata_filename)
|
||||||
|
|
||||||
def maybe_unicodeify(some_string):
|
all_metadata = open(abs_metadata_filename, 'r')
|
||||||
# this is kinda terrible
|
media_metadata = csv.DictReader(all_metadata)
|
||||||
if some_string is None:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return six.text_type(some_string)
|
|
||||||
|
|
||||||
with codecs.open(
|
for index, file_metadata in enumerate(media_metadata):
|
||||||
abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
|
if six.PY2:
|
||||||
contents = all_metadata.read()
|
file_metadata = {k.decode('utf-8'): v.decode('utf-8') for k, v in file_metadata.items()}
|
||||||
media_metadata = parse_csv_file(contents)
|
|
||||||
|
|
||||||
for media_id, file_metadata in media_metadata.items():
|
|
||||||
files_attempted += 1
|
files_attempted += 1
|
||||||
# In case the metadata was not uploaded initialize an empty dictionary.
|
# In case the metadata was not uploaded initialize an empty dictionary.
|
||||||
json_ld_metadata = compact_and_validate({})
|
json_ld_metadata = compact_and_validate({})
|
||||||
@ -119,6 +110,7 @@ def batchaddmedia(args):
|
|||||||
try:
|
try:
|
||||||
json_ld_metadata = compact_and_validate(file_metadata)
|
json_ld_metadata = compact_and_validate(file_metadata)
|
||||||
except ValidationError as exc:
|
except ValidationError as exc:
|
||||||
|
media_id = file_metadata.get('id') or index
|
||||||
error = _("""Error with media '{media_id}' value '{error_path}': {error_msg}
|
error = _("""Error with media '{media_id}' value '{error_path}': {error_msg}
|
||||||
Metadata was not uploaded.""".format(
|
Metadata was not uploaded.""".format(
|
||||||
media_id=media_id,
|
media_id=media_id,
|
||||||
@ -145,6 +137,8 @@ Metadata was not uploaded.""".format(
|
|||||||
# `batchaddmedia` to upload a file larger than 200MB.
|
# `batchaddmedia` to upload a file larger than 200MB.
|
||||||
media_file = tempfile.TemporaryFile()
|
media_file = tempfile.TemporaryFile()
|
||||||
shutil.copyfileobj(res.raw, media_file)
|
shutil.copyfileobj(res.raw, media_file)
|
||||||
|
if six.PY2:
|
||||||
|
media_file.seek(0)
|
||||||
|
|
||||||
elif url.scheme == '':
|
elif url.scheme == '':
|
||||||
path = url.path
|
path = url.path
|
||||||
@ -166,10 +160,10 @@ FAIL: Local file {filename} could not be accessed.
|
|||||||
user=user,
|
user=user,
|
||||||
submitted_file=media_file,
|
submitted_file=media_file,
|
||||||
filename=filename,
|
filename=filename,
|
||||||
title=maybe_unicodeify(title),
|
title=title,
|
||||||
description=maybe_unicodeify(description),
|
description=description,
|
||||||
collection_slug=maybe_unicodeify(collection_slug),
|
collection_slug=collection_slug,
|
||||||
license=maybe_unicodeify(license),
|
license=license,
|
||||||
metadata=json_ld_metadata,
|
metadata=json_ld_metadata,
|
||||||
tags_string="")
|
tags_string="")
|
||||||
print(_("""Successfully submitted {filename}!
|
print(_("""Successfully submitted {filename}!
|
||||||
@ -190,44 +184,3 @@ uploaded successfully.""".format(filename=filename)))
|
|||||||
"{files_uploaded} out of {files_attempted} files successfully submitted".format(
|
"{files_uploaded} out of {files_attempted} files successfully submitted".format(
|
||||||
files_uploaded=files_uploaded,
|
files_uploaded=files_uploaded,
|
||||||
files_attempted=files_attempted)))
|
files_attempted=files_attempted)))
|
||||||
|
|
||||||
|
|
||||||
def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
|
|
||||||
# csv.py doesn't do Unicode; encode temporarily as UTF-8:
|
|
||||||
# TODO: this probably won't be necessary in Python 3
|
|
||||||
csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
|
|
||||||
dialect=dialect, **kwargs)
|
|
||||||
for row in csv_reader:
|
|
||||||
# decode UTF-8 back to Unicode, cell by cell:
|
|
||||||
yield [six.text_type(cell, 'utf-8') for cell in row]
|
|
||||||
|
|
||||||
def utf_8_encoder(unicode_csv_data):
|
|
||||||
for line in unicode_csv_data:
|
|
||||||
yield line.encode('utf-8')
|
|
||||||
|
|
||||||
def parse_csv_file(file_contents):
|
|
||||||
"""
|
|
||||||
The helper function which converts the csv file into a dictionary where each
|
|
||||||
item's key is the provided value 'id' and each item's value is another
|
|
||||||
dictionary.
|
|
||||||
"""
|
|
||||||
list_of_contents = file_contents.split('\n')
|
|
||||||
key, lines = (list_of_contents[0].split(','),
|
|
||||||
list_of_contents[1:])
|
|
||||||
objects_dict = {}
|
|
||||||
|
|
||||||
# Build a dictionary
|
|
||||||
for index, line in enumerate(lines):
|
|
||||||
if line.isspace() or line == '': continue
|
|
||||||
if (sys.version_info[0] == 3):
|
|
||||||
# Python 3's csv.py supports Unicode out of the box.
|
|
||||||
reader = csv.reader([line])
|
|
||||||
else:
|
|
||||||
reader = unicode_csv_reader([line])
|
|
||||||
values = next(reader)
|
|
||||||
line_dict = dict([(key[i], val)
|
|
||||||
for i, val in enumerate(values)])
|
|
||||||
media_id = line_dict.get('id') or index
|
|
||||||
objects_dict[media_id] = (line_dict)
|
|
||||||
|
|
||||||
return objects_dict
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user