The script now officially works! It works in many different situations, whether

the media is to be uploaded is stored locally or on the web. Still have to clean
up the code and look for errors. I may also refactor some of this into a functi-
on to be used with a GUI frontend in another project. Lastly, I need to merge
this with the metadata branch I've been working on, and convert the metadata.csv
information into the proper format for the new metadata column.
This commit is contained in:
tilly-Q 2014-02-12 14:37:00 -05:00
parent 3214aeb238
commit 714c4cb7d7

View File

@ -15,6 +15,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import json, tempfile, urllib, tarfile, subprocess
from csv import reader as csv_reader
from urlparse import urlparse
from pyld import jsonld
from mediagoblin.gmg_commands import util as commands_util
from mediagoblin.submit.lib import (
@ -22,20 +26,26 @@ from mediagoblin.submit.lib import (
FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
from mediagoblin import mg_globals
import json, csv
def parser_setup(subparser):
subparser.add_argument(
'username',
help="Name of user this media entry belongs to")
subparser.add_argument(
'locationfile',
target_type = subparser.add_mutually_exclusive_group()
target_type.add_argument('-d',
'--directory', action='store_const',
const='directory', dest='target_type',
default='directory', help=(
"Target is a directory"))
target_type.add_argument('-a',
'--archive', action='store_const',
const='archive', dest='target_type',
help=(
"Local file on filesystem with the address of all the files to be uploaded"))
"Target is an archive."))
subparser.add_argument(
'metadatafile',
'target_path',
help=(
"Local file on filesystem with metadata of all the files to be uploaded"))
"Path to a local archive or directory containing a location.csv and metadata.csv file"))
subparser.add_argument(
"-l", "--license",
help=(
@ -59,19 +69,36 @@ def batchaddmedia(args):
if user is None:
print "Sorry, no user by username '%s'" % args.username
return
upload_limit, max_file_size = get_upload_file_limits(user)
temp_files = []
if args.target_type == 'archive':
dir_path = tempfile.mkdtemp()
temp_files.append(dir_path)
tar = tarfile.open(args.target_path)
tar.extractall(path=dir_path)
elif args.target_type == 'directory':
dir_path = args.target_path
location_file_path = "{dir_path}/location.csv".format(
dir_path=dir_path)
metadata_file_path = "{dir_path}/metadata.csv".format(
dir_path=dir_path)
# check for the location file, if it exists...
location_filename = os.path.split(args.locationfile)[-1]
abs_location_filename = os.path.abspath(args.locationfile)
location_filename = os.path.split(location_file_path)[-1]
abs_location_filename = os.path.abspath(location_file_path)
if not os.path.exists(abs_location_filename):
print "Can't find a file with filename '%s'" % args.locationfile
print "Can't find a file with filename '%s'" % location_file_path
return
# check for the location file, if it exists...
metadata_filename = os.path.split(args.metadatafile)[-1]
abs_metadata_filename = os.path.abspath(args.metadatafile)
# check for the metadata file, if it exists...
metadata_filename = os.path.split(metadata_file_path)[-1]
abs_metadata_filename = os.path.abspath(metadata_file_path)
if not os.path.exists(abs_metadata_filename):
print "Can't find a file with filename '%s'" % args.metadatafile
print "Can't find a file with filename '%s'" % metadata_file_path
return
upload_limit, max_file_size = get_upload_file_limits(user)
@ -91,20 +118,85 @@ def batchaddmedia(args):
contents = all_metadata.read()
media_metadata = parse_csv_file(contents)
dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' }
for media_id in media_locations.keys():
file_metadata = media_metadata[media_id]
json_ld_metadata = jsonld.compact(file_metadata, dcterms_context)
original_location = media_locations[media_id]['media:original']
url = urlparse(original_location)
title = file_metadata.get('dcterms:title')
description = file_metadata.get('dcterms:description')
license = file_metadata.get('dcterms:license')
filename = url.path.split()[-1]
print "Working with {filename}".format(filename=filename)
if url.scheme == 'http':
print "Downloading {filename}...".format(
filename=filename)
media_file = tempfile.TemporaryFile()
res = urllib.urlopen(url.geturl())
media_file.write(res.read())
media_file.seek(0)
elif url.scheme == '':
path = url.path
if os.path.isabs(path):
file_abs_path = os.path.abspath(path)
else:
file_path = "{dir_path}/{local_path}".format(
dir_path=dir_path,
local_path=path)
file_abs_path = os.path.abspath(file_path)
try:
media_file = file(file_abs_path, 'r')
except IOError:
print "Local file {filename} could not be accessed.".format(
filename=filename)
print "Skipping it."
continue
print "Submitting {filename}...".format(filename=filename)
try:
submit_media(
mg_app=app,
user=user,
submitted_file=media_file,
filename=filename,
title=maybe_unicodeify(title),
description=maybe_unicodeify(description),
license=maybe_unicodeify(license),
tags_string=u"",
upload_limit=upload_limit, max_file_size=max_file_size)
print "Successfully uploading {filename}!".format(filename=filename)
print ""
except FileUploadLimit:
print "This file is larger than the upload limits for this site."
except UserUploadLimit:
print "This file will put this user past their upload limits."
except UserPastUploadLimit:
print "This user is already past their upload limits."
teardown(temp_files)
def parse_csv_file(file_contents):
list_of_contents = file_contents.split('\n')
key, lines = (list_of_contents[0].split(','),
list_of_contents[1:])
list_of_objects = []
objects_dict = {}
# Build a dictionary
for line in lines:
if line.isspace() or line == '': continue
values = csv.reader([line]).next()
new_dict = dict([(key[i], val)
values = csv_reader([line]).next()
line_dict = dict([(key[i], val)
for i, val in enumerate(values)])
list_of_objects.append(new_dict)
media_id = line_dict['media:id']
objects_dict[media_id] = (line_dict)
return list_of_objects
return objects_dict
def teardown(temp_files):
for temp_file in temp_files:
subprocess.call(['rm','-r',temp_file])