Merge branch 'bulk-upload' into metadata
This commit is contained in:
commit
a372c30531
@ -53,6 +53,10 @@ SUBCOMMAND_MAP = {
|
||||
'setup': 'mediagoblin.gmg_commands.addmedia:parser_setup',
|
||||
'func': 'mediagoblin.gmg_commands.addmedia:addmedia',
|
||||
'help': 'Reprocess media entries'},
|
||||
'batchaddmedia': {
|
||||
'setup': 'mediagoblin.gmg_commands.batchaddmedia:parser_setup',
|
||||
'func': 'mediagoblin.gmg_commands.batchaddmedia:batchaddmedia',
|
||||
'help': 'Add many media entries at once'}
|
||||
# 'theme': {
|
||||
# 'setup': 'mediagoblin.gmg_commands.theme:theme_parser_setup',
|
||||
# 'func': 'mediagoblin.gmg_commands.theme:theme',
|
||||
|
217
mediagoblin/gmg_commands/batchaddmedia.py
Normal file
217
mediagoblin/gmg_commands/batchaddmedia.py
Normal file
@ -0,0 +1,217 @@
|
||||
# GNU MediaGoblin -- federated, autonomous media hosting
|
||||
# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import json, tempfile, urllib, tarfile, zipfile, subprocess
|
||||
from csv import reader as csv_reader
|
||||
from urlparse import urlparse
|
||||
from pyld import jsonld
|
||||
|
||||
from mediagoblin.gmg_commands import util as commands_util
|
||||
from mediagoblin.submit.lib import (
|
||||
submit_media, get_upload_file_limits,
|
||||
FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
|
||||
|
||||
from mediagoblin import mg_globals
|
||||
|
||||
def parser_setup(subparser):
|
||||
subparser.description = """\
|
||||
This command allows the administrator to upload many media files at once."""
|
||||
subparser.add_argument(
|
||||
'username',
|
||||
help="Name of user these media entries belong to")
|
||||
subparser.add_argument(
|
||||
'target_path',
|
||||
help=("""\
|
||||
Path to a local archive or directory containing a "location.csv" and a
|
||||
"metadata.csv" file. These are csv (comma seperated value) files with the
|
||||
locations and metadata of the files to be uploaded. The location must be listed
|
||||
with either the URL of the remote media file or the filesystem path of a local
|
||||
file. The metadata should be provided with one column for each of the 15 Dublin
|
||||
Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
|
||||
"metadata.csv" must begin with a row demonstrating the order of the columns. We
|
||||
have provided an example of these files at <url to be added>
|
||||
"""))
|
||||
subparser.add_argument(
|
||||
"-l", "--license",
|
||||
help=(
|
||||
"License these media entry will be released under, if all the same. "
|
||||
"Should be a URL."))
|
||||
subparser.add_argument(
|
||||
'--celery',
|
||||
action='store_true',
|
||||
help="Don't process eagerly, pass off to celery")
|
||||
|
||||
|
||||
def batchaddmedia(args):
|
||||
# Run eagerly unless explicetly set not to
|
||||
if not args.celery:
|
||||
os.environ['CELERY_ALWAYS_EAGER'] = 'true'
|
||||
|
||||
app = commands_util.setup_app(args)
|
||||
|
||||
files_uploaded, files_attempted = 0, 0
|
||||
|
||||
# get the user
|
||||
user = app.db.User.query.filter_by(username=args.username.lower()).first()
|
||||
if user is None:
|
||||
print "Sorry, no user by username '%s' exists" % args.username
|
||||
return
|
||||
|
||||
upload_limit, max_file_size = get_upload_file_limits(user)
|
||||
temp_files = []
|
||||
|
||||
if os.path.isdir(args.target_path):
|
||||
dir_path = args.target_path
|
||||
|
||||
elif tarfile.is_tarfile(args.target_path):
|
||||
dir_path = tempfile.mkdtemp()
|
||||
temp_files.append(dir_path)
|
||||
tar = tarfile.open(args.target_path)
|
||||
tar.extractall(path=dir_path)
|
||||
|
||||
elif zipfile.is_zipfile(args.target_path):
|
||||
dir_path = tempfile.mkdtemp()
|
||||
temp_files.append(dir_path)
|
||||
zipped_file = zipfile.ZipFile(args.target_path)
|
||||
zipped_file.extractall(path=dir_path)
|
||||
|
||||
else:
|
||||
print "Couldn't recognize the file. This script only accepts tar files,\
|
||||
zip files and directories"
|
||||
if dir_path.endswith('/'):
|
||||
dir_path = dir_path[:-1]
|
||||
|
||||
location_file_path = "{dir_path}/location.csv".format(
|
||||
dir_path=dir_path)
|
||||
metadata_file_path = "{dir_path}/metadata.csv".format(
|
||||
dir_path=dir_path)
|
||||
|
||||
# check for the location file, if it exists...
|
||||
location_filename = os.path.split(location_file_path)[-1]
|
||||
abs_location_filename = os.path.abspath(location_file_path)
|
||||
if not os.path.exists(abs_location_filename):
|
||||
print "Can't find a file with filename '%s'" % location_file_path
|
||||
return
|
||||
|
||||
# check for the metadata file, if it exists...
|
||||
metadata_filename = os.path.split(metadata_file_path)[-1]
|
||||
abs_metadata_filename = os.path.abspath(metadata_file_path)
|
||||
if not os.path.exists(abs_metadata_filename):
|
||||
print "Can't find a file with filename '%s'" % metadata_file_path
|
||||
return
|
||||
|
||||
upload_limit, max_file_size = get_upload_file_limits(user)
|
||||
|
||||
def maybe_unicodeify(some_string):
|
||||
# this is kinda terrible
|
||||
if some_string is None:
|
||||
return None
|
||||
else:
|
||||
return unicode(some_string)
|
||||
|
||||
with file(abs_location_filename, 'r') as all_locations:
|
||||
contents = all_locations.read()
|
||||
media_locations = parse_csv_file(contents)
|
||||
|
||||
with file(abs_metadata_filename, 'r') as all_metadata:
|
||||
contents = all_metadata.read()
|
||||
media_metadata = parse_csv_file(contents)
|
||||
|
||||
dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' }
|
||||
|
||||
for media_id in media_locations.keys():
|
||||
file_metadata = media_metadata[media_id]
|
||||
json_ld_metadata = jsonld.compact(file_metadata, dcterms_context)
|
||||
original_location = media_locations[media_id]['media:original']
|
||||
url = urlparse(original_location)
|
||||
|
||||
title = file_metadata.get('dcterms:title')
|
||||
description = file_metadata.get('dcterms:description')
|
||||
license = file_metadata.get('dcterms:license')
|
||||
filename = url.path.split()[-1]
|
||||
files_attempted += 1
|
||||
|
||||
if url.scheme == 'http':
|
||||
media_file = tempfile.TemporaryFile()
|
||||
res = urllib.urlopen(url.geturl())
|
||||
media_file.write(res.read())
|
||||
media_file.seek(0)
|
||||
|
||||
elif url.scheme == '':
|
||||
path = url.path
|
||||
if os.path.isabs(path):
|
||||
file_abs_path = os.path.abspath(path)
|
||||
else:
|
||||
file_path = "{dir_path}/{local_path}".format(
|
||||
dir_path=dir_path,
|
||||
local_path=path)
|
||||
file_abs_path = os.path.abspath(file_path)
|
||||
try:
|
||||
media_file = file(file_abs_path, 'r')
|
||||
except IOError:
|
||||
print "\
|
||||
FAIL: Local file {filename} could not be accessed.".format(filename=filename)
|
||||
print "Skipping it."
|
||||
continue
|
||||
try:
|
||||
submit_media(
|
||||
mg_app=app,
|
||||
user=user,
|
||||
submitted_file=media_file,
|
||||
filename=filename,
|
||||
title=maybe_unicodeify(title),
|
||||
description=maybe_unicodeify(description),
|
||||
license=maybe_unicodeify(license),
|
||||
tags_string=u"",
|
||||
upload_limit=upload_limit, max_file_size=max_file_size)
|
||||
print "Successfully uploading {filename}!".format(filename=filename)
|
||||
print ""
|
||||
files_uploaded += 1
|
||||
except FileUploadLimit:
|
||||
print "FAIL: This file is larger than the upload limits for this site."
|
||||
except UserUploadLimit:
|
||||
print "FAIL: This file will put this user past their upload limits."
|
||||
except UserPastUploadLimit:
|
||||
print "FAIL: This user is already past their upload limits."
|
||||
print "\
|
||||
{files_uploaded} out of {files_attempted} files successfully uploaded".format(
|
||||
files_uploaded=files_uploaded,
|
||||
files_attempted=files_attempted)
|
||||
teardown(temp_files)
|
||||
|
||||
|
||||
|
||||
def parse_csv_file(file_contents):
|
||||
list_of_contents = file_contents.split('\n')
|
||||
key, lines = (list_of_contents[0].split(','),
|
||||
list_of_contents[1:])
|
||||
objects_dict = {}
|
||||
|
||||
# Build a dictionary
|
||||
for line in lines:
|
||||
if line.isspace() or line == '': continue
|
||||
values = csv_reader([line]).next()
|
||||
line_dict = dict([(key[i], val)
|
||||
for i, val in enumerate(values)])
|
||||
media_id = line_dict['media:id']
|
||||
objects_dict[media_id] = (line_dict)
|
||||
|
||||
return objects_dict
|
||||
|
||||
def teardown(temp_files):
|
||||
for temp_file in temp_files:
|
||||
subprocess.call(['rm','-r',temp_file])
|
Loading…
x
Reference in New Issue
Block a user