Add new hook for two-step media type checking

Before uploaded media files were checked by extension. This led to
situations when a plugin can support file with specific extension but
doesn't due to lack of codecs, for example. Since the plugin reported
that it supports uploaded file type, the upload was being declared
successful, but transcoding failed.

The failures were not easy to debug.

The change adds a new hook that could allow two-step checking of the
content. The result of the hook execution returns a tuple with
media type name, manager and a callable sniffer, that can be used to
perform probably expensive checks of the content.

Also the change adds implementation of the hook for video.
This commit is contained in:
Boris Bobrov 2014-07-27 07:25:14 +04:00
parent 067ee13188
commit 54b4b28f84
6 changed files with 127 additions and 76 deletions

View File

@ -23,10 +23,18 @@ from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
_log = logging.getLogger(__name__)
class FileTypeNotSupported(Exception):
pass
class InvalidFileType(Exception):
class TypeNotFound(FileTypeNotSupported):
'''Raised if no mediagoblin plugin supporting this file type was found'''
pass
class MissingComponents(FileTypeNotSupported):
'''Raised if plugin found, but it can't process the file for some reason'''
pass
@ -50,40 +58,30 @@ class MediaManagerBase(object):
return hasattr(self, i)
def sniff_media(media_file, filename):
def sniff_media_contents(media_file, filename):
'''
Iterate through the enabled media types and find those suited
for a certain file.
Check media contents using 'expensive' scanning. For example, for video it
is checking the contents using gstreamer
:param media_file: file-like object with 'name' attribute
:param filename: expected filename of the media
'''
try:
return get_media_type_and_manager(filename)
except FileTypeNotSupported:
_log.info('No media handler found by file extension. Doing it the expensive way...')
# Create a temporary file for sniffers suchs as GStreamer-based
# Audio video
tmp_media_file = tempfile.NamedTemporaryFile()
tmp_media_file.write(media_file.read())
tmp_media_file.seek(0)
media_file.seek(0)
media_type = hook_handle('sniff_handler', tmp_media_file, filename)
if media_type:
_log.info('{0} accepts the file'.format(media_type))
return media_type, hook_handle(('media_manager', media_type))
else:
_log.debug('{0} did not accept the file'.format(media_type))
raise FileTypeNotSupported(
# TODO: Provide information on which file types are supported
_(u'Sorry, I don\'t support that file type :('))
media_type = hook_handle('sniff_handler', media_file, filename)
if media_type:
_log.info('{0} accepts the file'.format(media_type))
return media_type, hook_handle(('media_manager', media_type))
else:
_log.debug('{0} did not accept the file'.format(media_type))
raise FileTypeNotSupported(
# TODO: Provide information on which file types are supported
_(u'Sorry, I don\'t support that file type :('))
def get_media_type_and_manager(filename):
'''
Try to find the media type based on the file name, extension
specifically. This is used as a speedup, the sniffing functionality
then falls back on more in-depth bitsniffing of the source file.
This hook is deprecated, 'type_match_handler' should be used instead
'''
if filename.find('.') > 0:
# Get the file extension
@ -97,5 +95,67 @@ def get_media_type_and_manager(filename):
_log.info('File {0} has no file extension, let\'s hope the sniffers get it.'.format(
filename))
raise FileTypeNotSupported(
raise TypeNotFound(
_(u'Sorry, I don\'t support that file type :('))
def type_match_handler(media_file, filename):
'''Check media file by name and then by content
Try to find the media type based on the file name, extension
specifically. After that, if media type is one of supported ones, check the
contents of the file
'''
if filename.find('.') > 0:
# Get the file extension
ext = os.path.splitext(filename)[1].lower()
# Omit the dot from the extension and match it against
# the media manager
hook_result = hook_handle('type_match_handler', ext[1:])
if hook_result:
_log.info('Info about file found, checking further')
MEDIA_TYPE, Manager, sniffer = hook_result
if not sniffer:
_log.debug('sniffer is None, plugin trusts the extension')
return MEDIA_TYPE, Manager
_log.info('checking the contents with sniffer')
try:
sniffer(media_file)
_log.info('checked, found')
return MEDIA_TYPE, Manager
except Exception as e:
_log.info('sniffer says it will not accept the file')
_log.debug(e)
raise
else:
_log.info('No plugins handled extension {0}'.format(ext))
else:
_log.info('File {0} has no known file extension, let\'s hope '
'the sniffers get it.'.format(filename))
raise TypeNotFound(_(u'Sorry, I don\'t support that file type :('))
def sniff_media(media_file, filename):
'''
Iterate through the enabled media types and find those suited
for a certain file.
'''
# copy the contents to a .name-enabled temporary file for further checks
# TODO: there are cases when copying is not required
tmp_media_file = tempfile.NamedTemporaryFile()
media_file.save(tmp_media_file.name)
media_file.seek(0)
try:
return type_match_handler(tmp_media_file, filename)
except TypeNotFound as e:
_log.info('No plugins using two-step checking found')
# keep trying, using old `get_media_type_and_manager`
try:
return get_media_type_and_manager(filename)
except TypeNotFound as e:
# again, no luck. Do it expensive way
_log.info('No media handler found by file extension')
_log.info('Doing it the expensive way...')
return sniff_media_contents(tmp_media_file, filename)

View File

@ -39,10 +39,4 @@ def discover(src):
_log.info('Discovering {0}...'.format(src))
uri = 'file://{0}'.format(src)
discoverer = GstPbutils.Discoverer.new(60 * Gst.SECOND)
try:
info = discoverer.discover_uri(uri)
except GLib.GError as e:
_log.warning(u'Exception: {0}'.format(e))
info = None
_log.info('Done')
return info
return discoverer.discover_uri(uri)

View File

@ -15,8 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from mediagoblin.media_types import MediaManagerBase
from mediagoblin.media_types.video.processing import VideoProcessingManager, \
sniff_handler
from mediagoblin.media_types.video.processing import (VideoProcessingManager,
sniff_handler, sniffer)
MEDIA_TYPE = 'mediagoblin.media_types.video'
@ -38,8 +38,12 @@ def get_media_type_and_manager(ext):
if ext in ACCEPTED_EXTENSIONS:
return MEDIA_TYPE, VideoMediaManager
def type_match_handler(ext):
if ext in ACCEPTED_EXTENSIONS:
return MEDIA_TYPE, VideoMediaManager, sniffer
hooks = {
'get_media_type_and_manager': get_media_type_and_manager,
'type_match_handler': type_match_handler,
'sniff_handler': sniff_handler,
('media_manager', MEDIA_TYPE): lambda: VideoMediaManager,
('reprocess_manager', MEDIA_TYPE): lambda: VideoProcessingManager,

View File

@ -27,6 +27,7 @@ from mediagoblin.processing import (
get_process_filename, store_public,
copy_original)
from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
from mediagoblin.media_types import MissingComponents
from . import transcoders
from .util import skip_transcode
@ -44,24 +45,34 @@ class VideoTranscodingFail(BaseProcessingFail):
general_message = _(u'Video transcoding failed')
EXCLUDED_EXTS = ["nef", "cr2"]
def sniff_handler(media_file, filename):
data = transcoders.discover(media_file.name)
def sniffer(media_file):
'''New style sniffer, used in two-steps check; requires to have .name'''
_log.info('Sniffing {0}'.format(MEDIA_TYPE))
try:
data = transcoders.discover(media_file.name)
except Exception as e:
# this is usually GLib.GError, but we don't really care which one
_log.debug(u'GStreamer: {0}'.format(unicode(e)))
raise MissingComponents(u'GStreamer: {0}'.format(unicode(e)))
_log.debug('Discovered: {0}'.format(data))
if not data:
if not data.get_video_streams():
raise MissingComponents('No video streams found in this video')
if data.get_result() != 0: # it's 0 if success
name = data.get_misc().get_string('name') # XXX: is there always name?
raise MissingComponents(u'{0} is missing'.format(name))
return MEDIA_TYPE
def sniff_handler(media_file, filename):
try:
return sniffer(media_file)
except:
_log.error('Could not discover {0}'.format(filename))
return None
if data.get_video_streams():
return MEDIA_TYPE
return None
def store_metadata(media_entry, metadata):
"""
Store metadata from this video for this media entry.
@ -212,6 +223,7 @@ class CommonVideoProcessor(MediaProcessor):
# Extract metadata and keep a record of it
metadata = transcoders.discover(self.process_filename)
# metadata's stream info here is a DiscovererContainerInfo instance,
# it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
# metadata itself has container-related data in tags, like video-codec

View File

@ -26,8 +26,7 @@ from mediagoblin.tools.translate import pass_to_ugettext as _
from mediagoblin.tools.response import json_response
from mediagoblin.decorators import require_active_login
from mediagoblin.meddleware.csrf import csrf_exempt
from mediagoblin.media_types import \
InvalidFileType, FileTypeNotSupported
from mediagoblin.media_types import FileTypeNotSupported
from mediagoblin.plugins.api.tools import api_auth, get_entry_serializable
from mediagoblin.submit.lib import \
check_file_field, submit_media, get_upload_file_limits, \
@ -83,17 +82,8 @@ def post_entry(request):
except UserPastUploadLimit:
raise BadRequest(
_('Sorry, you have reached your upload limit.'))
except Exception as e:
'''
This section is intended to catch exceptions raised in
mediagoblin.media_types
'''
if isinstance(e, InvalidFileType) or \
isinstance(e, FileTypeNotSupported):
raise BadRequest(six.text_type(e))
else:
raise
except FileTypeNotSupported as e:
raise BadRequest(e)
@api_auth

View File

@ -29,8 +29,7 @@ from mediagoblin.tools.response import render_to_response, redirect
from mediagoblin.decorators import require_active_login, user_has_privilege
from mediagoblin.submit import forms as submit_forms
from mediagoblin.messages import add_message, SUCCESS
from mediagoblin.media_types import \
InvalidFileType, FileTypeNotSupported
from mediagoblin.media_types import FileTypeNotSupported
from mediagoblin.submit.lib import \
check_file_field, submit_media, get_upload_file_limits, \
FileUploadLimit, UserUploadLimit, UserPastUploadLimit
@ -89,18 +88,10 @@ def submit_start(request):
_('Sorry, you have reached your upload limit.'))
return redirect(request, "mediagoblin.user_pages.user_home",
user=request.user.username)
except FileTypeNotSupported as e:
submit_form.file.errors.append(e)
except Exception as e:
'''
This section is intended to catch exceptions raised in
mediagoblin.media_types
'''
if isinstance(e, InvalidFileType) or \
isinstance(e, FileTypeNotSupported):
submit_form.file.errors.append(
e)
else:
raise
raise
return render_to_response(
request,