Merge branch 'processing'

This commit is contained in:
Christopher Allan Webber 2011-08-19 12:34:40 -05:00
commit 583351109e
10 changed files with 392 additions and 46 deletions

View File

@ -53,6 +53,7 @@ def mediaentry_mediafiles_main_to_original(database):
collection.save(document)
@RegisterMigration(3)
def mediaentry_remove_thumbnail_file(database):
"""
@ -62,3 +63,32 @@ def mediaentry_remove_thumbnail_file(database):
{'thumbnail_file': {'$exists': True}},
{'$unset': {'thumbnail_file': 1}},
multi=True)
@RegisterMigration(4)
def mediaentry_add_queued_task_id(database):
"""
Add the 'queued_task_id' field for entries that don't have it.
"""
collection = database['media_entries']
collection.update(
{'queued_task_id': {'$exists': False}},
{'$set': {'queued_task_id': None}},
multi=True)
@RegisterMigration(5)
def mediaentry_add_fail_error_and_metadata(database):
"""
Add 'fail_error' and 'fail_metadata' fields to media entries
"""
collection = database['media_entries']
collection.update(
{'fail_error': {'$exists': False}},
{'$set': {'fail_error': None}},
multi=True)
collection.update(
{'fail_metadata': {'$exists': False}},
{'$set': {'fail_metadata': {}}},
multi=True)

View File

@ -162,6 +162,8 @@ class MediaEntry(Document):
queued for processing. This is stored in the mg_globals.queue_store
storage system.
- queued_task_id: celery task id. Use this to fetch the task state.
- media_files: Files relevant to this that have actually been processed
and are available for various types of display. Stored like:
{'thumb': ['dir1', 'dir2', 'pic.png'}
@ -169,6 +171,9 @@ class MediaEntry(Document):
- attachment_files: A list of "attachment" files, ones that aren't
critical to this piece of media but may be usefully relevant to people
viewing the work. (currently unused.)
- fail_error: path to the exception raised
- fail_metadata:
"""
__collection__ = 'media_entries'
@ -188,13 +193,19 @@ class MediaEntry(Document):
# For now let's assume there can only be one main file queued
# at a time
'queued_media_file': [unicode],
'queued_task_id': unicode,
# A dictionary of logical names to filepaths
'media_files': dict,
# The following should be lists of lists, in appropriate file
# record form
'attachment_files': list}
'attachment_files': list,
# If things go badly in processing things, we'll store that
# data here
'fail_error': unicode,
'fail_metadata': dict}
required_fields = [
'uploader', 'created', 'media_type', 'slug']
@ -286,6 +297,13 @@ class MediaEntry(Document):
def uploader(self):
return self.db.User.find_one({'_id': self['uploader']})
def get_fail_exception(self):
"""
Get the exception that's appropriate for this error
"""
if self['fail_error']:
return util.import_component(self['fail_error'])
class MediaComment(Document):
"""

View File

@ -15,11 +15,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import Image
from mediagoblin.db.util import ObjectId
from celery.task import task
from mediagoblin import mg_globals as mgg
from contextlib import contextmanager
from celery.task import Task
from celery import registry
from mediagoblin.db.util import ObjectId
from mediagoblin import mg_globals as mgg
from mediagoblin.process_media.errors import BaseProcessingFail, BadMediaFail
THUMB_SIZE = 180, 180
@ -32,6 +35,7 @@ def create_pub_filepath(entry, filename):
unicode(entry['_id']),
filename])
@contextmanager
def closing(callback):
try:
@ -39,19 +43,99 @@ def closing(callback):
finally:
pass
@task
def process_media_initial(media_id):
workbench = mgg.workbench_manager.create_workbench()
entry = mgg.database.MediaEntry.one(
{'_id': ObjectId(media_id)})
################################
# Media processing initial steps
################################
class ProcessMedia(Task):
"""
Pass this entry off for processing.
"""
def run(self, media_id):
"""
Pass the media entry off to the appropriate processing function
(for now just process_image...)
"""
entry = mgg.database.MediaEntry.one(
{'_id': ObjectId(media_id)})
# Try to process, and handle expected errors.
try:
process_image(entry)
except BaseProcessingFail, exc:
mark_entry_failed(entry[u'_id'], exc)
return
entry['state'] = u'processed'
entry.save()
def on_failure(self, exc, task_id, args, kwargs, einfo):
"""
If the processing failed we should mark that in the database.
Assuming that the exception raised is a subclass of BaseProcessingFail,
we can use that to get more information about the failure and store that
for conveying information to users about the failure, etc.
"""
entry_id = args[0]
mark_entry_failed(entry_id, exc)
process_media = registry.tasks[ProcessMedia.name]
def mark_entry_failed(entry_id, exc):
"""
Mark a media entry as having failed in its conversion.
Uses the exception that was raised to mark more information. If the
exception is a derivative of BaseProcessingFail then we can store extra
information that can be useful for users telling them why their media failed
to process.
Args:
- entry_id: The id of the media entry
"""
# Was this a BaseProcessingFail? In other words, was this a
# type of error that we know how to handle?
if isinstance(exc, BaseProcessingFail):
# Looks like yes, so record information about that failure and any
# metadata the user might have supplied.
mgg.database['media_entries'].update(
{'_id': entry_id},
{'$set': {u'state': u'failed',
u'fail_error': exc.exception_path,
u'fail_metadata': exc.metadata}})
else:
# Looks like no, so just mark it as failed and don't record a
# failure_error (we'll assume it wasn't handled) and don't record
# metadata (in fact overwrite it if somehow it had previous info
# here)
mgg.database['media_entries'].update(
{'_id': entry_id},
{'$set': {u'state': u'failed',
u'fail_error': None,
u'fail_metadata': {}}})
def process_image(entry):
"""
Code to process an image
"""
workbench = mgg.workbench_manager.create_workbench()
queued_filepath = entry['queued_media_file']
queued_filename = workbench.localized_file(
mgg.queue_store, queued_filepath,
'source')
thumb = Image.open(queued_filename)
try:
thumb = Image.open(queued_filename)
except IOError:
raise BadMediaFail()
thumb.thumbnail(THUMB_SIZE, Image.ANTIALIAS)
# ensure color mode is compatible with jpg
if thumb.mode != "RGB":
@ -63,11 +147,9 @@ def process_media_initial(media_id):
with closing(thumb_file):
thumb.save(thumb_file, "JPEG", quality=90)
"""
If the size of the original file exceeds the specified size of a `medium`
file, a `medium.jpg` files is created and later associated with the media
entry.
"""
# If the size of the original file exceeds the specified size of a `medium`
# file, a `medium.jpg` files is created and later associated with the media
# entry.
medium = Image.open(queued_filename)
medium_processed = False
@ -101,8 +183,6 @@ def process_media_initial(media_id):
media_files_dict['original'] = original_filepath
if medium_processed:
media_files_dict['medium'] = medium_filepath
entry['state'] = u'processed'
entry.save()
# clean up workbench
workbench.destroy_self()

View File

@ -0,0 +1,44 @@
# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from mediagoblin.util import lazy_pass_to_ugettext as _
class BaseProcessingFail(Exception):
"""
Base exception that all other processing failure messages should
subclass from.
You shouldn't call this itself; instead you should subclass it
and provid the exception_path and general_message applicable to
this error.
"""
general_message = u''
@property
def exception_path(self):
return u"%s:%s" % (
self.__class__.__module__, self.__class__.__name__)
def __init__(self, **metadata):
self.metadata = metadata or {}
class BadMediaFail(BaseProcessingFail):
"""
Error that should be raised when an inappropriate file was given
for the media type specified.
"""
general_message = _(u'Invalid file given for media type.')

View File

@ -287,3 +287,15 @@ ul.mediaentry_tags li {
margin: 0px 5px 0px 0px;
padding: 0px;
}
/* media processing panel */
table.media_panel {
width: 100%;
}
table.media_panel th {
font-weight: bold;
padding-bottom: 4px;
}

View File

@ -14,9 +14,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import uuid
from os.path import splitext
from cgi import FieldStorage
from string import split
from werkzeug.utils import secure_filename
@ -27,7 +28,7 @@ from mediagoblin.util import (
from mediagoblin.util import pass_to_ugettext as _
from mediagoblin.decorators import require_active_login
from mediagoblin.submit import forms as submit_forms, security
from mediagoblin.process_media import process_media_initial
from mediagoblin.process_media import process_media, mark_entry_failed
from mediagoblin.messages import add_message, SUCCESS
@ -86,10 +87,37 @@ def submit_start(request):
# Add queued filename to the entry
entry['queued_media_file'] = queue_filepath
# We generate this ourselves so we know what the taks id is for
# retrieval later.
# (If we got it off the task's auto-generation, there'd be a risk of
# a race condition when we'd save after sending off the task)
task_id = unicode(uuid.uuid4())
entry['queued_task_id'] = task_id
# Save now so we have this data before kicking off processing
entry.save(validate=True)
# queue it for processing
process_media_initial.delay(unicode(entry['_id']))
# Pass off to processing
#
# (... don't change entry after this point to avoid race
# conditions with changes to the document via processing code)
try:
process_media.apply_async(
[unicode(entry['_id'])], {},
task_id=task_id)
except BaseException as exc:
# The purpose of this section is because when running in "lazy"
# or always-eager-with-exceptions-propagated celery mode that
# the failure handling won't happen on Celery end. Since we
# expect a lot of users to run things in this way we have to
# capture stuff here.
#
# ... not completely the diaper pattern because the exception is
# re-raised :)
mark_entry_failed(entry[u'_id'], exc)
# re-raise the exception
raise
add_message(request, SUCCESS, _('Woohoo! Submitted!'))

View File

@ -0,0 +1,67 @@
{#
# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#}
{% extends "mediagoblin/base.html" %}
{% block mediagoblin_content %}
<h1>{% trans %}Media processing panel{% endtrans %}</h1>
<p>
{% trans %}You can track the state of media being processed for your gallery here.{% endtrans %}
</p>
<h2>{% trans %}Media in-processing{% endtrans %}</h2>
{% if processing_entries.count() %}
<table class="media_panel processing">
<tr>
<th>Title</th>
<th>When submitted</th>
<th>Status</th>
</tr>
{% for media_entry in processing_entries %}
<tr>
<td>{{ media_entry['title'] }}</td>
<td>{{ media_entry['created'].strftime("%m-%d-%Y %I:%M %p") }}</td>
<td></td>
</tr>
{% endfor %}
</table>
{% else %}
<p><i>{% trans %}No media in-processing{% endtrans %}</i></p>
{% endif %}
{% if failed_entries.count() %}
<h2>{% trans %}These uploads failed to process:{% endtrans %}</h2>
<table class="media_panel failed">
<tr>
<th>Title</th>
<th>When submitted</th>
<th>Reason for failure</th>
</tr>
{% for media_entry in failed_entries %}
<tr>
<td>{{ media_entry['title'] }}</td>
<td>{{ media_entry['created'].strftime("%m-%d-%Y %I:%M %p") }}</td>
<td>{{ media_entry.get_fail_exception().general_message }}</td>
</tr>
{% endfor %}
</table>
{% endif %}
{% endblock %}

View File

@ -156,7 +156,7 @@ class TestSubmission:
util.clear_test_template_context()
response = self.test_app.post(
'/submit/', {
'title': 'Malicious Upload 2'
'title': 'Malicious Upload 1'
}, upload_files=[(
'file', EVIL_FILE)])
@ -164,33 +164,46 @@ class TestSubmission:
form = context['submit_form']
assert form.file.errors == ['The file doesn\'t seem to be an image!']
# NOTE: The following 2 tests will fail. These can be uncommented
# after http://bugs.foocorp.net/issues/324 is resolved and
# bad files are handled properly.
# NOTE: The following 2 tests will ultimately fail, but they
# *will* pass the initial form submission step. Instead,
# they'll be caught as failures during the processing step.
# Test non-supported file with .jpg extension
# -------------------------------------------
#util.clear_test_template_context()
#response = self.test_app.post(
# '/submit/', {
# 'title': 'Malicious Upload 2'
# }, upload_files=[(
# 'file', EVIL_JPG)])
util.clear_test_template_context()
response = self.test_app.post(
'/submit/', {
'title': 'Malicious Upload 2'
}, upload_files=[(
'file', EVIL_JPG)])
response.follow()
assert_equal(
urlparse.urlsplit(response.location)[2],
'/u/chris/')
#context = util.TEMPLATE_TEST_CONTEXT['mediagoblin/submit/start.html']
#form = context['submit_form']
#assert form.file.errors == ['The file doesn\'t seem to be an image!']
entry = mg_globals.database.MediaEntry.find_one(
{'title': 'Malicious Upload 2'})
assert_equal(entry['state'], 'failed')
assert_equal(
entry['fail_error'],
u'mediagoblin.process_media.errors:BadMediaFail')
# Test non-supported file with .png extension
# -------------------------------------------
#util.clear_test_template_context()
#response = self.test_app.post(
# '/submit/', {
# 'title': 'Malicious Upload 3'
# }, upload_files=[(
# 'file', EVIL_PNG)])
#context = util.TEMPLATE_TEST_CONTEXT['mediagoblin/submit/start.html']
#form = context['submit_form']
#assert form.file.errors == ['The file doesn\'t seem to be an image!']
util.clear_test_template_context()
response = self.test_app.post(
'/submit/', {
'title': 'Malicious Upload 3'
}, upload_files=[(
'file', EVIL_PNG)])
response.follow()
assert_equal(
urlparse.urlsplit(response.location)[2],
'/u/chris/')
entry = mg_globals.database.MediaEntry.find_one(
{'title': 'Malicious Upload 3'})
assert_equal(entry['state'], 'failed')
assert_equal(
entry['fail_error'],
u'mediagoblin.process_media.errors:BadMediaFail')

View File

@ -33,4 +33,8 @@ user_routes = [
controller="mediagoblin.user_pages.views:atom_feed"),
Route('mediagoblin.user_pages.media_post_comment',
'/{user}/m/{media}/comment/add/',
controller="mediagoblin.user_pages.views:media_post_comment")]
controller="mediagoblin.user_pages.views:media_post_comment"),
Route('mediagoblin.user_pages.processing_panel',
'/{user}/panel/',
controller="mediagoblin.user_pages.views:processing_panel"),
]

View File

@ -1,4 +1,4 @@
# GNU MediaGoblin -- federated, autonomous media hosting
# MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
@ -175,3 +175,53 @@ def atom_feed(request):
url=entry.url_for_self(request.urlgen))
return feed.get_response()
@require_active_login
def processing_panel(request):
"""
Show to the user what media is still in conversion/processing...
and what failed, and why!
"""
# Get the user
user = request.db.User.find_one(
{'username': request.matchdict['user'],
'status': 'active'})
# Make sure the user exists and is active
if not user:
return exc.HTTPNotFound()
elif user['status'] != u'active':
return render_to_response(
request,
'mediagoblin/user_pages/user.html',
{'user': user})
# XXX: Should this be a decorator?
#
# Make sure we have permission to access this user's panel. Only
# admins and this user herself should be able to do so.
if not (user[u'_id'] == request.user[u'_id']
or request.user.is_admin):
# No? Let's simply redirect to this user's homepage then.
return redirect(
request, 'mediagoblin.user_pages.user_home',
user=request.matchdict['user'])
# Get media entries which are in-processing
processing_entries = request.db.MediaEntry.find(
{'uploader': user['_id'],
'state': 'processing'}).sort('created', DESCENDING)
# Get media entries which have failed to process
failed_entries = request.db.MediaEntry.find(
{'uploader': user['_id'],
'state': 'failed'}).sort('created', DESCENDING)
# Render to response
return render_to_response(
request,
'mediagoblin/user_pages/processing_panel.html',
{'user': user,
'processing_entries': processing_entries,
'failed_entries': failed_entries})