From 7a29c67bf92830427e30590fe5a7b720da7520d4 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 6 Feb 2014 15:15:57 -0500 Subject: [PATCH 01/18] In this commit, I added a new column which will be used for RDFa metadata of media. --- mediagoblin/db/migrations.py | 12 ++++++++++++ mediagoblin/db/models.py | 1 + 2 files changed, 13 insertions(+) diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py index 426080a2..a7400bf0 100644 --- a/mediagoblin/db/migrations.py +++ b/mediagoblin/db/migrations.py @@ -31,6 +31,7 @@ from mediagoblin.db.migration_tools import ( RegisterMigration, inspect_table, replace_table_hack) from mediagoblin.db.models import (MediaEntry, Collection, MediaComment, User, Privilege) +from mediagoblin.db.extratypes import JSONEncoded, MutationDict MIGRATIONS = {} @@ -720,3 +721,14 @@ def drop_MediaEntry_collected(db): media_collected.drop() db.commit() + +@RegisterMigration(20, MIGRATIONS) +def add_work_metadata_column(db): + metadata = MetaData(bind=db.bind) + + media_file = inspect_table(metadata, 'core__mediafiles') + + col = Column('work_metadata', MutationDict.as_mutable(JSONEncoded)) + col.create(media_file) + + db.commit() diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py index b750375d..ac69d040 100644 --- a/mediagoblin/db/models.py +++ b/mediagoblin/db/models.py @@ -420,6 +420,7 @@ class MediaFile(Base): name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False) file_path = Column(PathTupleWithSlashes) file_metadata = Column(MutationDict.as_mutable(JSONEncoded)) + work_metadata = Column(MutationDict.as_mutable(JSONEncoded)) __table_args__ = ( PrimaryKeyConstraint('media_entry', 'name_id'), From 74d7ff96142c2da375e12df91e23fb50c2b2af88 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Tue, 18 Mar 2014 16:49:48 -0400 Subject: [PATCH 02/18] Big update. I added in a json-ld context file which will be used in all our metadata columns in the future. The context describes the dublin core elements. It still has not been finalized however. --- mediagoblin/routing.py | 4 +- .../mediagoblin/metadata_contexts/v1 | 70 +++++++++++++++++++ mediagoblin/views.py | 6 ++ setup.py | 1 + 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 mediagoblin/templates/mediagoblin/metadata_contexts/v1 diff --git a/mediagoblin/routing.py b/mediagoblin/routing.py index 1393f01c..a6b2a543 100644 --- a/mediagoblin/routing.py +++ b/mediagoblin/routing.py @@ -28,7 +28,9 @@ _log = logging.getLogger(__name__) def get_url_map(): add_route('index', '/', 'mediagoblin.views:root_view') add_route('terms_of_service','/terms_of_service', - 'mediagoblin.views:terms_of_service') + 'mediagoblin.views:terms_of_service'), + add_route('metadata_context','/metadata_context/v/', + 'mediagoblin.views:metadata_context_view'), mount('/auth', auth_routes) mount('/mod', moderation_routes) diff --git a/mediagoblin/templates/mediagoblin/metadata_contexts/v1 b/mediagoblin/templates/mediagoblin/metadata_contexts/v1 new file mode 100644 index 00000000..1325d920 --- /dev/null +++ b/mediagoblin/templates/mediagoblin/metadata_contexts/v1 @@ -0,0 +1,70 @@ +{ + "@context": { + "dc": "http://purl.org/dc/elements/1.1/", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "contributor":{ + "@id":"dc:title", + "@type":"xsd:string" + }, + "coverage":{ + "@id":"dc:coverage", + "@type":"xsd:string" + }, + "created":{ + "@id":"dc:created", + "@type":"xsd:date" + }, + "creator":{ + "@id":"dc:created", + "@type":"xsd:date" + }, + "date":{ + "@id":"dc:date", + "@type":"xsd:date" + }, + "description":{ + "@id":"dc:description", + "@type":"xsd:string" + }, + "format":{ + "@id":"dc:format", + "@type":"xsd:string" + }, + "identifier":{ + "@id":"dc:identifier", + "@type":"xsd:string" + }, + "language":{ + "@id":"dc:language", + "@type":"xsd:string" + }, + "publisher":{ + "@id":"dc:publisher", + "@type":"xsd:string" + }, + "relation":{ + "@id":"dc:relation", + "@type":"xsd:string" + }, + "rights":{ + "@id":"dc:rights", + "@type":"xsd:anyURI" + }, + "source":{ + "@id":"dc:source", + "@type":"xsd:string" + }, + "subject":{ + "@id":"dc:subject", + "@type":"xsd:string" + }, + "title": { + "@id":"dc:title", + "@type":"xsd:string" + }, + "type":{ + "@id":"dc:type", + "@type":"xsd:string" + } + } +} diff --git a/mediagoblin/views.py b/mediagoblin/views.py index 009e48e4..1ed71473 100644 --- a/mediagoblin/views.py +++ b/mediagoblin/views.py @@ -62,3 +62,9 @@ def terms_of_service(request): return render_to_response(request, 'mediagoblin/terms_of_service.html', {}) + +def metadata_context_view(request): + version = request.matchdict['version_number'] + return render_to_response(request, + 'mediagoblin/metadata_contexts/v{version}'.format( + version=version), {}) diff --git a/setup.py b/setup.py index d3f91686..93873d73 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ try: 'six>=1.4.1', 'oauthlib==0.5.0', 'unidecode', + 'jsonschema', ## Annoying. Please remove once we can! We only indirectly ## use pbr, and currently it breaks things, presumably till From 3214aeb2387cd1356685372f9abaebe35ea7f006 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 6 Feb 2014 15:17:06 -0500 Subject: [PATCH 03/18] This branch will create a commandline bulk-upload script. So far, I have written the code to read csv files into a usable dictionary. --- mediagoblin/gmg_commands/__init__.py | 4 + mediagoblin/gmg_commands/batchaddmedia.py | 110 ++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 mediagoblin/gmg_commands/batchaddmedia.py diff --git a/mediagoblin/gmg_commands/__init__.py b/mediagoblin/gmg_commands/__init__.py index a1eb599d..1460733f 100644 --- a/mediagoblin/gmg_commands/__init__.py +++ b/mediagoblin/gmg_commands/__init__.py @@ -53,6 +53,10 @@ SUBCOMMAND_MAP = { 'setup': 'mediagoblin.gmg_commands.addmedia:parser_setup', 'func': 'mediagoblin.gmg_commands.addmedia:addmedia', 'help': 'Reprocess media entries'}, + 'batchaddmedia': { + 'setup': 'mediagoblin.gmg_commands.batchaddmedia:parser_setup', + 'func': 'mediagoblin.gmg_commands.batchaddmedia:batchaddmedia', + 'help': 'Reprocess many media entries'} # 'theme': { # 'setup': 'mediagoblin.gmg_commands.theme:theme_parser_setup', # 'func': 'mediagoblin.gmg_commands.theme:theme', diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py new file mode 100644 index 00000000..1c0f6784 --- /dev/null +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -0,0 +1,110 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os + +from mediagoblin.gmg_commands import util as commands_util +from mediagoblin.submit.lib import ( + submit_media, get_upload_file_limits, + FileUploadLimit, UserUploadLimit, UserPastUploadLimit) + +from mediagoblin import mg_globals +import json, csv + +def parser_setup(subparser): + subparser.add_argument( + 'username', + help="Name of user this media entry belongs to") + subparser.add_argument( + 'locationfile', + help=( +"Local file on filesystem with the address of all the files to be uploaded")) + subparser.add_argument( + 'metadatafile', + help=( +"Local file on filesystem with metadata of all the files to be uploaded")) + subparser.add_argument( + "-l", "--license", + help=( + "License these media entry will be released under, if all the same" + "Should be a URL.")) + subparser.add_argument( + '--celery', + action='store_true', + help="Don't process eagerly, pass off to celery") + + +def batchaddmedia(args): + # Run eagerly unless explicetly set not to + if not args.celery: + os.environ['CELERY_ALWAYS_EAGER'] = 'true' + + app = commands_util.setup_app(args) + + # get the user + user = app.db.User.query.filter_by(username=args.username.lower()).first() + if user is None: + print "Sorry, no user by username '%s'" % args.username + return + + # check for the location file, if it exists... + location_filename = os.path.split(args.locationfile)[-1] + abs_location_filename = os.path.abspath(args.locationfile) + if not os.path.exists(abs_location_filename): + print "Can't find a file with filename '%s'" % args.locationfile + return + + # check for the location file, if it exists... + metadata_filename = os.path.split(args.metadatafile)[-1] + abs_metadata_filename = os.path.abspath(args.metadatafile) + if not os.path.exists(abs_metadata_filename): + print "Can't find a file with filename '%s'" % args.metadatafile + return + + upload_limit, max_file_size = get_upload_file_limits(user) + + def maybe_unicodeify(some_string): + # this is kinda terrible + if some_string is None: + return None + else: + return unicode(some_string) + + with file(abs_location_filename, 'r') as all_locations: + contents = all_locations.read() + media_locations = parse_csv_file(contents) + + with file(abs_metadata_filename, 'r') as all_metadata: + contents = all_metadata.read() + media_metadata = parse_csv_file(contents) + +def parse_csv_file(file_contents): + list_of_contents = file_contents.split('\n') + key, lines = (list_of_contents[0].split(','), + list_of_contents[1:]) + list_of_objects = [] + + # Build a dictionary + for line in lines: + if line.isspace() or line == '': continue + values = csv.reader([line]).next() + new_dict = dict([(key[i], val) + for i, val in enumerate(values)]) + list_of_objects.append(new_dict) + + return list_of_objects + + From 714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Wed, 12 Feb 2014 14:37:00 -0500 Subject: [PATCH 04/18] The script now officially works! It works in many different situations, whether the media is to be uploaded is stored locally or on the web. Still have to clean up the code and look for errors. I may also refactor some of this into a functi- on to be used with a GUI frontend in another project. Lastly, I need to merge this with the metadata branch I've been working on, and convert the metadata.csv information into the proper format for the new metadata column. --- mediagoblin/gmg_commands/batchaddmedia.py | 130 ++++++++++++++++++---- 1 file changed, 111 insertions(+), 19 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 1c0f6784..7d7a2d4f 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -15,6 +15,10 @@ # along with this program. If not, see . import os +import json, tempfile, urllib, tarfile, subprocess +from csv import reader as csv_reader +from urlparse import urlparse +from pyld import jsonld from mediagoblin.gmg_commands import util as commands_util from mediagoblin.submit.lib import ( @@ -22,20 +26,26 @@ from mediagoblin.submit.lib import ( FileUploadLimit, UserUploadLimit, UserPastUploadLimit) from mediagoblin import mg_globals -import json, csv def parser_setup(subparser): subparser.add_argument( 'username', help="Name of user this media entry belongs to") - subparser.add_argument( - 'locationfile', + target_type = subparser.add_mutually_exclusive_group() + target_type.add_argument('-d', + '--directory', action='store_const', + const='directory', dest='target_type', + default='directory', help=( +"Target is a directory")) + target_type.add_argument('-a', + '--archive', action='store_const', + const='archive', dest='target_type', help=( -"Local file on filesystem with the address of all the files to be uploaded")) +"Target is an archive.")) subparser.add_argument( - 'metadatafile', + 'target_path', help=( -"Local file on filesystem with metadata of all the files to be uploaded")) +"Path to a local archive or directory containing a location.csv and metadata.csv file")) subparser.add_argument( "-l", "--license", help=( @@ -59,19 +69,36 @@ def batchaddmedia(args): if user is None: print "Sorry, no user by username '%s'" % args.username return + + upload_limit, max_file_size = get_upload_file_limits(user) + temp_files = [] + + if args.target_type == 'archive': + dir_path = tempfile.mkdtemp() + temp_files.append(dir_path) + tar = tarfile.open(args.target_path) + tar.extractall(path=dir_path) + + elif args.target_type == 'directory': + dir_path = args.target_path + + location_file_path = "{dir_path}/location.csv".format( + dir_path=dir_path) + metadata_file_path = "{dir_path}/metadata.csv".format( + dir_path=dir_path) # check for the location file, if it exists... - location_filename = os.path.split(args.locationfile)[-1] - abs_location_filename = os.path.abspath(args.locationfile) + location_filename = os.path.split(location_file_path)[-1] + abs_location_filename = os.path.abspath(location_file_path) if not os.path.exists(abs_location_filename): - print "Can't find a file with filename '%s'" % args.locationfile + print "Can't find a file with filename '%s'" % location_file_path return - # check for the location file, if it exists... - metadata_filename = os.path.split(args.metadatafile)[-1] - abs_metadata_filename = os.path.abspath(args.metadatafile) + # check for the metadata file, if it exists... + metadata_filename = os.path.split(metadata_file_path)[-1] + abs_metadata_filename = os.path.abspath(metadata_file_path) if not os.path.exists(abs_metadata_filename): - print "Can't find a file with filename '%s'" % args.metadatafile + print "Can't find a file with filename '%s'" % metadata_file_path return upload_limit, max_file_size = get_upload_file_limits(user) @@ -91,20 +118,85 @@ def batchaddmedia(args): contents = all_metadata.read() media_metadata = parse_csv_file(contents) + dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' } + + for media_id in media_locations.keys(): + file_metadata = media_metadata[media_id] + json_ld_metadata = jsonld.compact(file_metadata, dcterms_context) + original_location = media_locations[media_id]['media:original'] + url = urlparse(original_location) + + title = file_metadata.get('dcterms:title') + description = file_metadata.get('dcterms:description') + license = file_metadata.get('dcterms:license') + filename = url.path.split()[-1] + print "Working with {filename}".format(filename=filename) + + if url.scheme == 'http': + print "Downloading {filename}...".format( + filename=filename) + media_file = tempfile.TemporaryFile() + res = urllib.urlopen(url.geturl()) + media_file.write(res.read()) + media_file.seek(0) + + elif url.scheme == '': + path = url.path + if os.path.isabs(path): + file_abs_path = os.path.abspath(path) + else: + file_path = "{dir_path}/{local_path}".format( + dir_path=dir_path, + local_path=path) + file_abs_path = os.path.abspath(file_path) + try: + media_file = file(file_abs_path, 'r') + except IOError: + print "Local file {filename} could not be accessed.".format( + filename=filename) + print "Skipping it." + continue + print "Submitting {filename}...".format(filename=filename) + try: + submit_media( + mg_app=app, + user=user, + submitted_file=media_file, + filename=filename, + title=maybe_unicodeify(title), + description=maybe_unicodeify(description), + license=maybe_unicodeify(license), + tags_string=u"", + upload_limit=upload_limit, max_file_size=max_file_size) + print "Successfully uploading {filename}!".format(filename=filename) + print "" + except FileUploadLimit: + print "This file is larger than the upload limits for this site." + except UserUploadLimit: + print "This file will put this user past their upload limits." + except UserPastUploadLimit: + print "This user is already past their upload limits." + teardown(temp_files) + + + def parse_csv_file(file_contents): list_of_contents = file_contents.split('\n') key, lines = (list_of_contents[0].split(','), list_of_contents[1:]) - list_of_objects = [] + objects_dict = {} # Build a dictionary for line in lines: if line.isspace() or line == '': continue - values = csv.reader([line]).next() - new_dict = dict([(key[i], val) + values = csv_reader([line]).next() + line_dict = dict([(key[i], val) for i, val in enumerate(values)]) - list_of_objects.append(new_dict) + media_id = line_dict['media:id'] + objects_dict[media_id] = (line_dict) - return list_of_objects + return objects_dict - +def teardown(temp_files): + for temp_file in temp_files: + subprocess.call(['rm','-r',temp_file]) From 27b7d94896cd3cede2050b62af1321ad69cd3fa1 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 13 Feb 2014 13:57:10 -0500 Subject: [PATCH 05/18] Minor change in the wording of argparsing. --- mediagoblin/gmg_commands/batchaddmedia.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 7d7a2d4f..2fd36dfb 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -36,12 +36,12 @@ def parser_setup(subparser): '--directory', action='store_const', const='directory', dest='target_type', default='directory', help=( -"Target is a directory")) +"Choose this option is the target is a directory.")) target_type.add_argument('-a', '--archive', action='store_const', const='archive', dest='target_type', help=( -"Target is an archive.")) +"Choose this option if the target is an archive.")) subparser.add_argument( 'target_path', help=( From 579a6b574f402c23d3b09d22e4ab4c9f71b0e7aa Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Wed, 19 Feb 2014 14:27:14 -0500 Subject: [PATCH 06/18] I made it so the command no longer requires the "Target type" to be provided, it now recognizes whether the target is a directory or an archive on its own. I added in a help message, which is still incomplete, but should make it easier for admins to know how to use this new command. I believe we should also provi- -de an example of the location.csv and metadata.csv files, so there is no conf- -usion. Also, I made it possible for the command to recognize zip files as a valid archive. I also made some minor changes to the commands description w/i the larger gmg command help menu. --- mediagoblin/gmg_commands/__init__.py | 2 +- mediagoblin/gmg_commands/batchaddmedia.py | 55 ++++++++++++++--------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/mediagoblin/gmg_commands/__init__.py b/mediagoblin/gmg_commands/__init__.py index 1460733f..55e85116 100644 --- a/mediagoblin/gmg_commands/__init__.py +++ b/mediagoblin/gmg_commands/__init__.py @@ -56,7 +56,7 @@ SUBCOMMAND_MAP = { 'batchaddmedia': { 'setup': 'mediagoblin.gmg_commands.batchaddmedia:parser_setup', 'func': 'mediagoblin.gmg_commands.batchaddmedia:batchaddmedia', - 'help': 'Reprocess many media entries'} + 'help': 'Add many media entries at once'} # 'theme': { # 'setup': 'mediagoblin.gmg_commands.theme:theme_parser_setup', # 'func': 'mediagoblin.gmg_commands.theme:theme', diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 2fd36dfb..d3ab7733 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -15,7 +15,7 @@ # along with this program. If not, see . import os -import json, tempfile, urllib, tarfile, subprocess +import json, tempfile, urllib, tarfile, zipfile, subprocess from csv import reader as csv_reader from urlparse import urlparse from pyld import jsonld @@ -28,29 +28,28 @@ from mediagoblin.submit.lib import ( from mediagoblin import mg_globals def parser_setup(subparser): + subparser.description = """\ +This command allows the administrator to upload many media files at once.""" subparser.add_argument( 'username', - help="Name of user this media entry belongs to") - target_type = subparser.add_mutually_exclusive_group() - target_type.add_argument('-d', - '--directory', action='store_const', - const='directory', dest='target_type', - default='directory', help=( -"Choose this option is the target is a directory.")) - target_type.add_argument('-a', - '--archive', action='store_const', - const='archive', dest='target_type', - help=( -"Choose this option if the target is an archive.")) + help="Name of user these media entries belong to") subparser.add_argument( 'target_path', - help=( -"Path to a local archive or directory containing a location.csv and metadata.csv file")) + help=("""\ +Path to a local archive or directory containing a "location.csv" and a +"metadata.csv" file. These are csv (comma seperated value) files with the +locations and metadata of the files to be uploaded. The location must be listed +with either the URL of the remote media file or the filesystem path of a local +file. The metadata should be provided with one column for each of the 15 Dublin +Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and +"metadata.csv" must begin with a row demonstrating the order of the columns. We +have provided an example of these files at +""")) subparser.add_argument( "-l", "--license", help=( - "License these media entry will be released under, if all the same" - "Should be a URL.")) + "License these media entry will be released under, if all the same. " + "Should be a URL.")) subparser.add_argument( '--celery', action='store_true', @@ -67,26 +66,38 @@ def batchaddmedia(args): # get the user user = app.db.User.query.filter_by(username=args.username.lower()).first() if user is None: - print "Sorry, no user by username '%s'" % args.username + print "Sorry, no user by username '%s' exists" % args.username return upload_limit, max_file_size = get_upload_file_limits(user) temp_files = [] - if args.target_type == 'archive': + if tarfile.is_tarfile(args.target_path): dir_path = tempfile.mkdtemp() temp_files.append(dir_path) tar = tarfile.open(args.target_path) tar.extractall(path=dir_path) - elif args.target_type == 'directory': + elif zipfile.is_zipfile(args.target_path): + dir_path = tempfile.mkdtemp() + temp_files.append(dir_path) + zipped_file = zipfile.ZipFile(args.target_path) + zipped_file.extractall(path=dir_path) + + elif os.path.isdir(args.target_path): dir_path = args.target_path + else: + print "Couldn't recognize the file. This script only accepts tar files,\ +zip files and directories" + if dir_path.endswith('/'): + dir_path = dir_path[:-1] + location_file_path = "{dir_path}/location.csv".format( dir_path=dir_path) metadata_file_path = "{dir_path}/metadata.csv".format( dir_path=dir_path) - + # check for the location file, if it exists... location_filename = os.path.split(location_file_path)[-1] abs_location_filename = os.path.abspath(location_file_path) @@ -178,7 +189,7 @@ def batchaddmedia(args): print "This user is already past their upload limits." teardown(temp_files) - + def parse_csv_file(file_contents): list_of_contents = file_contents.split('\n') From 9d4e9de76b29f8cc602a0db6334e7d36bb3e0fb0 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Fri, 21 Feb 2014 12:38:02 -0500 Subject: [PATCH 07/18] Changed some of the print messages as well as tweaked the order of the commands attempts to figure out what type of file the target file is. --- mediagoblin/gmg_commands/batchaddmedia.py | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index d3ab7733..678c8ab4 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -63,6 +63,8 @@ def batchaddmedia(args): app = commands_util.setup_app(args) + files_uploaded, files_attempted = 0, 0 + # get the user user = app.db.User.query.filter_by(username=args.username.lower()).first() if user is None: @@ -72,7 +74,10 @@ def batchaddmedia(args): upload_limit, max_file_size = get_upload_file_limits(user) temp_files = [] - if tarfile.is_tarfile(args.target_path): + if os.path.isdir(args.target_path): + dir_path = args.target_path + + elif tarfile.is_tarfile(args.target_path): dir_path = tempfile.mkdtemp() temp_files.append(dir_path) tar = tarfile.open(args.target_path) @@ -84,9 +89,6 @@ def batchaddmedia(args): zipped_file = zipfile.ZipFile(args.target_path) zipped_file.extractall(path=dir_path) - elif os.path.isdir(args.target_path): - dir_path = args.target_path - else: print "Couldn't recognize the file. This script only accepts tar files,\ zip files and directories" @@ -141,11 +143,9 @@ zip files and directories" description = file_metadata.get('dcterms:description') license = file_metadata.get('dcterms:license') filename = url.path.split()[-1] - print "Working with {filename}".format(filename=filename) + files_attempted += 1 if url.scheme == 'http': - print "Downloading {filename}...".format( - filename=filename) media_file = tempfile.TemporaryFile() res = urllib.urlopen(url.geturl()) media_file.write(res.read()) @@ -163,11 +163,10 @@ zip files and directories" try: media_file = file(file_abs_path, 'r') except IOError: - print "Local file {filename} could not be accessed.".format( - filename=filename) + print "\ +FAIL: Local file {filename} could not be accessed.".format(filename=filename) print "Skipping it." continue - print "Submitting {filename}...".format(filename=filename) try: submit_media( mg_app=app, @@ -181,12 +180,17 @@ zip files and directories" upload_limit=upload_limit, max_file_size=max_file_size) print "Successfully uploading {filename}!".format(filename=filename) print "" + files_uploaded += 1 except FileUploadLimit: - print "This file is larger than the upload limits for this site." + print "FAIL: This file is larger than the upload limits for this site." except UserUploadLimit: - print "This file will put this user past their upload limits." + print "FAIL: This file will put this user past their upload limits." except UserPastUploadLimit: - print "This user is already past their upload limits." + print "FAIL: This user is already past their upload limits." + print "\ +{files_uploaded} out of {files_attempted} files successfully uploaded".format( + files_uploaded=files_uploaded, + files_attempted=files_attempted) teardown(temp_files) From 6b43a6f432b57c0f54427d65de361adc63388799 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 27 Mar 2014 13:31:04 -0400 Subject: [PATCH 08/18] Began work on metadata validation --- mediagoblin/gmg_commands/batchaddmedia.py | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 678c8ab4..83aea7b7 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -26,6 +26,7 @@ from mediagoblin.submit.lib import ( FileUploadLimit, UserUploadLimit, UserPastUploadLimit) from mediagoblin import mg_globals +from jsonschema import validate def parser_setup(subparser): subparser.description = """\ @@ -215,3 +216,35 @@ def parse_csv_file(file_contents): def teardown(temp_files): for temp_file in temp_files: subprocess.call(['rm','-r',temp_file]) + +def check_metadata_format(metadata_dict): + schema = json.loads(""" +{ + "$schema":"http://json-schema.org/schema#", + "properties":{ + "@context":{}, + "contributor":{}, + "coverage":{}, + "created":{}, + "creator":{}, + "date":{}, + "description":{}, + "format":{}, + "identifier":{}, + "language":{}, + "publisher":{}, + "relation":{}, + "rights" : { + "format":"uri", + "type":"string" + }, + "source":{}, + "subject":{}, + "title":{}, + "type":{} + }, + "additionalProperties": false, + "required":["title","@context"] +}""") + try: + validate(metadata_dict, schema) From 680faaaa855a5fa60178d6b2a7e562619d3a4c4b Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 27 Mar 2014 13:55:15 -0400 Subject: [PATCH 09/18] Added exception handling into the metadata format checking function. --- mediagoblin/gmg_commands/batchaddmedia.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 83aea7b7..f06bc2e8 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -24,9 +24,11 @@ from mediagoblin.gmg_commands import util as commands_util from mediagoblin.submit.lib import ( submit_media, get_upload_file_limits, FileUploadLimit, UserUploadLimit, UserPastUploadLimit) +from mediagoblin.tools.translate import lazy_pass_to_ugettext as _ from mediagoblin import mg_globals -from jsonschema import validate +from jsonschema import validate +from jsonschema.exceptions import ValidationError def parser_setup(subparser): subparser.description = """\ @@ -135,7 +137,10 @@ zip files and directories" dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' } for media_id in media_locations.keys(): - file_metadata = media_metadata[media_id] + file_metadata = media_metadata[media_id] + santized_metadata = check_metadata_format(file_metadata) + if sanitized_metadata == {}: continue + json_ld_metadata = jsonld.compact(file_metadata, dcterms_context) original_location = media_locations[media_id]['media:original'] url = urlparse(original_location) @@ -248,3 +253,14 @@ def check_metadata_format(metadata_dict): }""") try: validate(metadata_dict, schema) + output_dict = metadata_dict + except ValidationError, exc: + title = metadata_dict.get('title') or metadata_dict.get('media:id') or \ + _(u'UNKNOWN FILE') + print _( +u"""WARN: Could not find appropriate metadata for file {title}. File will be +skipped""".format(title=title)) + output_dict = {} + except: + raise + return output_dict From 8f054a6b99a594da36a859f7bb5f11464c1602bd Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 27 Mar 2014 14:11:12 -0400 Subject: [PATCH 10/18] Fixed up some fatal errors. Is still not ready. --- mediagoblin/gmg_commands/batchaddmedia.py | 46 ++++++++++++----------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index f06bc2e8..414e969c 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -138,7 +138,7 @@ zip files and directories" for media_id in media_locations.keys(): file_metadata = media_metadata[media_id] - santized_metadata = check_metadata_format(file_metadata) + sanitized_metadata = check_metadata_format(file_metadata) if sanitized_metadata == {}: continue json_ld_metadata = jsonld.compact(file_metadata, dcterms_context) @@ -207,7 +207,7 @@ def parse_csv_file(file_contents): list_of_contents[1:]) objects_dict = {} - # Build a dictionary + # Build a dictionaryfrom mediagoblin.tools.translate import lazy_pass_to_ugettext as _ for line in lines: if line.isspace() or line == '': continue values = csv_reader([line]).next() @@ -228,38 +228,40 @@ def check_metadata_format(metadata_dict): "$schema":"http://json-schema.org/schema#", "properties":{ "@context":{}, - "contributor":{}, - "coverage":{}, - "created":{}, - "creator":{}, - "date":{}, - "description":{}, - "format":{}, - "identifier":{}, - "language":{}, - "publisher":{}, - "relation":{}, - "rights" : { + "dcterms:contributor":{}, + "dcterms:coverage":{}, + "dcterms:created":{}, + "dcterms:creator":{}, + "dcterms:date":{}, + "dcterms:description":{}, + "dcterms:format":{}, + "dcterms:identifier":{}, + "dcterms:language":{}, + "dcterms:publisher":{}, + "dcterms:relation":{}, + "dcterms:rights" : { "format":"uri", "type":"string" }, - "source":{}, - "subject":{}, - "title":{}, - "type":{} + "dcterms:source":{}, + "dcterms:subject":{}, + "dcterms:title":{}, + "dcterms:type":{}, + "media:id":{} }, "additionalProperties": false, - "required":["title","@context"] + "required":["dcterms:title","@context","media:id"] }""") + metadata_dict["@context"] = u"http://127.0.0.1:6543/metadata_context/v1/" try: validate(metadata_dict, schema) output_dict = metadata_dict except ValidationError, exc: - title = metadata_dict.get('title') or metadata_dict.get('media:id') or \ + title = metadata_dict.get('dcterms:title') or metadata_dict.get('media:id') or \ _(u'UNKNOWN FILE') print _( -u"""WARN: Could not find appropriate metadata for file {title}. File will be -skipped""".format(title=title)) +u"""WARN: Could not find appropriate metadata for file {title}. +File will be skipped""".format(title=title)) output_dict = {} except: raise From 32aec1e533e9de8b843e54d5a08b55d26e81f87e Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 27 Mar 2014 17:10:31 -0400 Subject: [PATCH 11/18] Fixed a minor error in the batch upload script and modified the json-ld context. --- mediagoblin/gmg_commands/batchaddmedia.py | 9 ++--- .../mediagoblin/metadata_contexts/v1 | 34 +++++++++---------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 414e969c..012a5ee4 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -137,6 +137,8 @@ zip files and directories" dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' } for media_id in media_locations.keys(): + files_attempted += 1 + file_metadata = media_metadata[media_id] sanitized_metadata = check_metadata_format(file_metadata) if sanitized_metadata == {}: continue @@ -149,7 +151,6 @@ zip files and directories" description = file_metadata.get('dcterms:description') license = file_metadata.get('dcterms:license') filename = url.path.split()[-1] - files_attempted += 1 if url.scheme == 'http': media_file = tempfile.TemporaryFile() @@ -228,6 +229,7 @@ def check_metadata_format(metadata_dict): "$schema":"http://json-schema.org/schema#", "properties":{ "@context":{}, + "dcterms:contributor":{}, "dcterms:coverage":{}, "dcterms:created":{}, @@ -246,8 +248,7 @@ def check_metadata_format(metadata_dict): "dcterms:source":{}, "dcterms:subject":{}, "dcterms:title":{}, - "dcterms:type":{}, - "media:id":{} + "dcterms:type":{} }, "additionalProperties": false, "required":["dcterms:title","@context","media:id"] @@ -260,7 +261,7 @@ def check_metadata_format(metadata_dict): title = metadata_dict.get('dcterms:title') or metadata_dict.get('media:id') or \ _(u'UNKNOWN FILE') print _( -u"""WARN: Could not find appropriate metadata for file {title}. +u"""WARN: Could not find appropriate metadata for file "{title}". File will be skipped""".format(title=title)) output_dict = {} except: diff --git a/mediagoblin/templates/mediagoblin/metadata_contexts/v1 b/mediagoblin/templates/mediagoblin/metadata_contexts/v1 index 1325d920..99882de2 100644 --- a/mediagoblin/templates/mediagoblin/metadata_contexts/v1 +++ b/mediagoblin/templates/mediagoblin/metadata_contexts/v1 @@ -1,69 +1,69 @@ { "@context": { - "dc": "http://purl.org/dc/elements/1.1/", + "dcterms": "http://purl.org/dc/elements/1.1/", "xsd": "http://www.w3.org/2001/XMLSchema#", "contributor":{ - "@id":"dc:title", + "@id":"dcterms:title", "@type":"xsd:string" }, "coverage":{ - "@id":"dc:coverage", + "@id":"dcterms:coverage", "@type":"xsd:string" }, "created":{ - "@id":"dc:created", + "@id":"dcterms:created", "@type":"xsd:date" }, "creator":{ - "@id":"dc:created", + "@id":"dcterms:created", "@type":"xsd:date" }, "date":{ - "@id":"dc:date", + "@id":"dcterms:date", "@type":"xsd:date" }, "description":{ - "@id":"dc:description", + "@id":"dcterms:description", "@type":"xsd:string" }, "format":{ - "@id":"dc:format", + "@id":"dcterms:format", "@type":"xsd:string" }, "identifier":{ - "@id":"dc:identifier", + "@id":"dcterms:identifier", "@type":"xsd:string" }, "language":{ - "@id":"dc:language", + "@id":"dcterms:language", "@type":"xsd:string" }, "publisher":{ - "@id":"dc:publisher", + "@id":"dcterms:publisher", "@type":"xsd:string" }, "relation":{ - "@id":"dc:relation", + "@id":"dcterms:relation", "@type":"xsd:string" }, "rights":{ - "@id":"dc:rights", + "@id":"dcterms:rights", "@type":"xsd:anyURI" }, "source":{ - "@id":"dc:source", + "@id":"dcterms:source", "@type":"xsd:string" }, "subject":{ - "@id":"dc:subject", + "@id":"dcterms:subject", "@type":"xsd:string" }, "title": { - "@id":"dc:title", + "@id":"dcterms:title", "@type":"xsd:string" }, "type":{ - "@id":"dc:type", + "@id":"dcterms:type", "@type":"xsd:string" } } From 0e4144abaf2dc6a18bc2750808e4561caf4e5e9c Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 27 Mar 2014 17:29:34 -0400 Subject: [PATCH 12/18] Wrote more comprehensive error messages. --- mediagoblin/gmg_commands/batchaddmedia.py | 28 +++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 012a5ee4..fe345d5f 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -229,7 +229,7 @@ def check_metadata_format(metadata_dict): "$schema":"http://json-schema.org/schema#", "properties":{ "@context":{}, - + "media:id":{}, "dcterms:contributor":{}, "dcterms:coverage":{}, "dcterms:created":{}, @@ -251,18 +251,32 @@ def check_metadata_format(metadata_dict): "dcterms:type":{} }, "additionalProperties": false, - "required":["dcterms:title","@context","media:id"] + "required":["dcterms:title","@context","media:id","bell"] }""") metadata_dict["@context"] = u"http://127.0.0.1:6543/metadata_context/v1/" try: validate(metadata_dict, schema) output_dict = metadata_dict except ValidationError, exc: - title = metadata_dict.get('dcterms:title') or metadata_dict.get('media:id') or \ - _(u'UNKNOWN FILE') - print _( -u"""WARN: Could not find appropriate metadata for file "{title}". -File will be skipped""".format(title=title)) + title = (metadata_dict.get('dcterms:title') or + metadata_dict.get('media:id') or _(u'UNKNOWN FILE')) + + if exc.validator == "additionalProperties": + message = _(u'Invalid metadata provided for file "{title}". This \ +script only accepts the Dublin Core metadata terms.'.format(title=title)) + + elif exc.validator == "required": + message = _( +u'All necessary metadata was not provided for file "{title}", you must include \ +a "dcterms:title" column for each media file'.format(title=title)) + + else: + message = _(u'Could not find appropriate metadata for file \ +"{title}".'.format(title=title)) + + print _(u"""WARN: {message} \nSkipping File...\n""".format( + message=message)) + output_dict = {} except: raise From 8e33666813b7ab5f46746a1c294c8e5baa6b08ef Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 3 Apr 2014 12:18:17 -0400 Subject: [PATCH 13/18] Fixed a bad get of 'dcterms:rights' and am throwing away the idea of an external context file for the json-ld because it feels unnecessary seeing as we are just using the dc core terms --- mediagoblin/gmg_commands/batchaddmedia.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index fe345d5f..68993aa2 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -15,7 +15,7 @@ # along with this program. If not, see . import os -import json, tempfile, urllib, tarfile, zipfile, subprocess +import tempfile, urllib, tarfile, zipfile, subprocess from csv import reader as csv_reader from urlparse import urlparse from pyld import jsonld @@ -149,7 +149,7 @@ zip files and directories" title = file_metadata.get('dcterms:title') description = file_metadata.get('dcterms:description') - license = file_metadata.get('dcterms:license') + license = file_metadata.get('dcterms:rights') filename = url.path.split()[-1] if url.scheme == 'http': @@ -201,7 +201,6 @@ FAIL: Local file {filename} could not be accessed.".format(filename=filename) teardown(temp_files) - def parse_csv_file(file_contents): list_of_contents = file_contents.split('\n') key, lines = (list_of_contents[0].split(','), @@ -219,16 +218,16 @@ def parse_csv_file(file_contents): return objects_dict + def teardown(temp_files): for temp_file in temp_files: subprocess.call(['rm','-r',temp_file]) + def check_metadata_format(metadata_dict): - schema = json.loads(""" -{ + schema = { "$schema":"http://json-schema.org/schema#", "properties":{ - "@context":{}, "media:id":{}, "dcterms:contributor":{}, "dcterms:coverage":{}, @@ -250,13 +249,14 @@ def check_metadata_format(metadata_dict): "dcterms:title":{}, "dcterms:type":{} }, - "additionalProperties": false, - "required":["dcterms:title","@context","media:id","bell"] -}""") - metadata_dict["@context"] = u"http://127.0.0.1:6543/metadata_context/v1/" + "additionalProperties": False, + "required":["dcterms:title","media:id"] +} try: validate(metadata_dict, schema) output_dict = metadata_dict + del output_dict['media:id'] + except ValidationError, exc: title = (metadata_dict.get('dcterms:title') or metadata_dict.get('media:id') or _(u'UNKNOWN FILE')) @@ -280,4 +280,5 @@ a "dcterms:title" column for each media file'.format(title=title)) output_dict = {} except: raise + return output_dict From fb60426ed1263de092ebc27afb96175d55ae7095 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Thu, 3 Apr 2014 12:20:30 -0400 Subject: [PATCH 14/18] Took out all of the references to the temporary url I was using /metadata_context/v1 --- mediagoblin/routing.py | 2 - .../mediagoblin/metadata_contexts/v1 | 70 ------------------- mediagoblin/views.py | 6 -- 3 files changed, 78 deletions(-) delete mode 100644 mediagoblin/templates/mediagoblin/metadata_contexts/v1 diff --git a/mediagoblin/routing.py b/mediagoblin/routing.py index a6b2a543..9f2584d3 100644 --- a/mediagoblin/routing.py +++ b/mediagoblin/routing.py @@ -29,8 +29,6 @@ def get_url_map(): add_route('index', '/', 'mediagoblin.views:root_view') add_route('terms_of_service','/terms_of_service', 'mediagoblin.views:terms_of_service'), - add_route('metadata_context','/metadata_context/v/', - 'mediagoblin.views:metadata_context_view'), mount('/auth', auth_routes) mount('/mod', moderation_routes) diff --git a/mediagoblin/templates/mediagoblin/metadata_contexts/v1 b/mediagoblin/templates/mediagoblin/metadata_contexts/v1 deleted file mode 100644 index 99882de2..00000000 --- a/mediagoblin/templates/mediagoblin/metadata_contexts/v1 +++ /dev/null @@ -1,70 +0,0 @@ -{ - "@context": { - "dcterms": "http://purl.org/dc/elements/1.1/", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "contributor":{ - "@id":"dcterms:title", - "@type":"xsd:string" - }, - "coverage":{ - "@id":"dcterms:coverage", - "@type":"xsd:string" - }, - "created":{ - "@id":"dcterms:created", - "@type":"xsd:date" - }, - "creator":{ - "@id":"dcterms:created", - "@type":"xsd:date" - }, - "date":{ - "@id":"dcterms:date", - "@type":"xsd:date" - }, - "description":{ - "@id":"dcterms:description", - "@type":"xsd:string" - }, - "format":{ - "@id":"dcterms:format", - "@type":"xsd:string" - }, - "identifier":{ - "@id":"dcterms:identifier", - "@type":"xsd:string" - }, - "language":{ - "@id":"dcterms:language", - "@type":"xsd:string" - }, - "publisher":{ - "@id":"dcterms:publisher", - "@type":"xsd:string" - }, - "relation":{ - "@id":"dcterms:relation", - "@type":"xsd:string" - }, - "rights":{ - "@id":"dcterms:rights", - "@type":"xsd:anyURI" - }, - "source":{ - "@id":"dcterms:source", - "@type":"xsd:string" - }, - "subject":{ - "@id":"dcterms:subject", - "@type":"xsd:string" - }, - "title": { - "@id":"dcterms:title", - "@type":"xsd:string" - }, - "type":{ - "@id":"dcterms:type", - "@type":"xsd:string" - } - } -} diff --git a/mediagoblin/views.py b/mediagoblin/views.py index 1ed71473..009e48e4 100644 --- a/mediagoblin/views.py +++ b/mediagoblin/views.py @@ -62,9 +62,3 @@ def terms_of_service(request): return render_to_response(request, 'mediagoblin/terms_of_service.html', {}) - -def metadata_context_view(request): - version = request.matchdict['version_number'] - return render_to_response(request, - 'mediagoblin/metadata_contexts/v{version}'.format( - version=version), {}) From 6fa9b06f9a7d9f33b2e891ff615395dfbb20c18e Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Fri, 11 Apr 2014 13:06:09 -0400 Subject: [PATCH 15/18] Fixed incorrectly coded references to filesystem paths --- mediagoblin/gmg_commands/batchaddmedia.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 68993aa2..b058a47e 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -98,10 +98,8 @@ zip files and directories" if dir_path.endswith('/'): dir_path = dir_path[:-1] - location_file_path = "{dir_path}/location.csv".format( - dir_path=dir_path) - metadata_file_path = "{dir_path}/metadata.csv".format( - dir_path=dir_path) + location_file_path = os.path.join(dir_path,"location.csv") + metadata_file_path = os.path.join(dir_path, "metadata.csv") # check for the location file, if it exists... location_filename = os.path.split(location_file_path)[-1] @@ -163,9 +161,7 @@ zip files and directories" if os.path.isabs(path): file_abs_path = os.path.abspath(path) else: - file_path = "{dir_path}/{local_path}".format( - dir_path=dir_path, - local_path=path) + file_path = os.path.join(dir_path, path) file_abs_path = os.path.abspath(file_path) try: media_file = file(file_abs_path, 'r') From fbb13abe9a22d08c3a2b86245cf02c1363c36d86 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Tue, 15 Apr 2014 13:35:22 -0400 Subject: [PATCH 16/18] Added the 'requests' library as a dependency and switched over to using it to fetch remote pieces of media in the batchupload script --- mediagoblin/gmg_commands/batchaddmedia.py | 9 ++++----- setup.py | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index b058a47e..deb6c5bd 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -15,9 +15,10 @@ # along with this program. If not, see . import os -import tempfile, urllib, tarfile, zipfile, subprocess +import tempfile, tarfile, zipfile, subprocess, requests from csv import reader as csv_reader from urlparse import urlparse +import requests from pyld import jsonld from mediagoblin.gmg_commands import util as commands_util @@ -151,10 +152,8 @@ zip files and directories" filename = url.path.split()[-1] if url.scheme == 'http': - media_file = tempfile.TemporaryFile() - res = urllib.urlopen(url.geturl()) - media_file.write(res.read()) - media_file.seek(0) + res = requests.get(url.geturl()) + media_file = res.raw elif url.scheme == '': path = url.path diff --git a/setup.py b/setup.py index 93873d73..12739ffd 100644 --- a/setup.py +++ b/setup.py @@ -67,6 +67,7 @@ try: 'oauthlib==0.5.0', 'unidecode', 'jsonschema', + 'requests', ## Annoying. Please remove once we can! We only indirectly ## use pbr, and currently it breaks things, presumably till From b91df79041f59ec87ad2e6f48ca6aa2a78de3c1d Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Tue, 15 Apr 2014 13:51:27 -0400 Subject: [PATCH 17/18] Moved the metadata column to MediaEntry rather than MediaFile --- mediagoblin/db/migrations.py | 8 ++++---- mediagoblin/db/models.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py index a7400bf0..294ab43b 100644 --- a/mediagoblin/db/migrations.py +++ b/mediagoblin/db/migrations.py @@ -723,12 +723,12 @@ def drop_MediaEntry_collected(db): db.commit() @RegisterMigration(20, MIGRATIONS) -def add_work_metadata_column(db): +def add_metadata_column(db): metadata = MetaData(bind=db.bind) - media_file = inspect_table(metadata, 'core__mediafiles') + media_entry = inspect_table(metadata, 'core__media_entries') - col = Column('work_metadata', MutationDict.as_mutable(JSONEncoded)) - col.create(media_file) + col = Column('metadata', MutationDict.as_mutable(JSONEncoded)) + col.create(media_entry) db.commit() diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py index ac69d040..7c0f0bf3 100644 --- a/mediagoblin/db/models.py +++ b/mediagoblin/db/models.py @@ -264,6 +264,7 @@ class MediaEntry(Base, MediaEntryMixin): cascade="all, delete-orphan" ) collections = association_proxy("collections_helper", "in_collection") + metadata = Column(MutationDict.as_mutable(JSONEncoded)) ## TODO # fail_error @@ -420,7 +421,6 @@ class MediaFile(Base): name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False) file_path = Column(PathTupleWithSlashes) file_metadata = Column(MutationDict.as_mutable(JSONEncoded)) - work_metadata = Column(MutationDict.as_mutable(JSONEncoded)) __table_args__ = ( PrimaryKeyConstraint('media_entry', 'name_id'), From 89b6b55766f71466ec001398b2537569543dc175 Mon Sep 17 00:00:00 2001 From: tilly-Q Date: Tue, 15 Apr 2014 14:17:43 -0400 Subject: [PATCH 18/18] Changed the name of the metadata column --- mediagoblin/db/migrations.py | 2 +- mediagoblin/db/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mediagoblin/db/migrations.py b/mediagoblin/db/migrations.py index 294ab43b..8dac3214 100644 --- a/mediagoblin/db/migrations.py +++ b/mediagoblin/db/migrations.py @@ -728,7 +728,7 @@ def add_metadata_column(db): media_entry = inspect_table(metadata, 'core__media_entries') - col = Column('metadata', MutationDict.as_mutable(JSONEncoded)) + col = Column('media_metadata', MutationDict.as_mutable(JSONEncoded)) col.create(media_entry) db.commit() diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py index 7c0f0bf3..defa0849 100644 --- a/mediagoblin/db/models.py +++ b/mediagoblin/db/models.py @@ -264,7 +264,7 @@ class MediaEntry(Base, MediaEntryMixin): cascade="all, delete-orphan" ) collections = association_proxy("collections_helper", "in_collection") - metadata = Column(MutationDict.as_mutable(JSONEncoded)) + media_metadata = Column(MutationDict.as_mutable(JSONEncoded)) ## TODO # fail_error