Added multiple a-v streams handling

Before only single video stream files were suported.

This patch adds support for files with multiple video streams. Metadata
of such files is now correctly stored. This required change of the
schema used to store info and the change is done in a migration.
This commit is contained in:
Boris Bobrov 2014-08-08 06:09:28 +04:00
parent 945a1c5667
commit 2d1e89055d
5 changed files with 151 additions and 74 deletions

View File

@ -36,7 +36,11 @@ MEDIA_TYPE = 'mediagoblin.media_types.audio'
def sniff_handler(media_file, filename):
_log.info('Sniffing {0}'.format(MEDIA_TYPE))
data = discover(media_file.name)
try:
data = discover(media_file.name)
except Exception as e:
_log.info(unicode(e))
return None
if data and data.get_audio_streams() and not data.get_video_streams():
return MEDIA_TYPE
return None

View File

@ -18,6 +18,8 @@ from mediagoblin.db.migration_tools import RegisterMigration, inspect_table
from sqlalchemy import MetaData, Column, Unicode
import json
MIGRATIONS = {}
@ -47,3 +49,62 @@ def webm_640_to_webm_video(db):
values(name='webm_video'))
db.commit()
@RegisterMigration(3, MIGRATIONS)
def change_metadata_format(db):
"""Change orig_metadata format for multi-stream a-v"""
db_metadata = MetaData(bind=db.bind)
vid_data = inspect_table(db_metadata, "video__mediadata")
for row in db.execute(vid_data.select()):
metadata = json.loads(row.orig_metadata)
if not metadata:
continue
# before this migration there was info about only one video or audio
# stream. So, we store existing info as the first item in the list
new_metadata = {'audio': [], 'video': [], 'common': {}}
video_key_map = { # old: new
'videoheight': 'height',
'videowidth': 'width',
'videorate': 'rate',
}
audio_key_map = { # old: new
'audiochannels': 'channels',
}
common_key_map = {
'videolength': 'length',
}
new_metadata['video'] = [dict((v, metadata.get(k))
for k, v in video_key_map.items() if metadata.get(k))]
new_metadata['audio'] = [dict((v, metadata.get(k))
for k, v in audio_key_map.items() if metadata.get(k))]
new_metadata['common'] = dict((v, metadata.get(k))
for k, v in common_key_map.items() if metadata.get(k))
# 'mimetype' should be in tags
new_metadata['common']['tags'] = {'mimetype': metadata.get('mimetype')}
if 'tags' in metadata:
new_metadata['video'][0]['tags'] = {}
new_metadata['audio'][0]['tags'] = {}
tags = metadata['tags']
video_keys = ['encoder', 'encoder-version', 'video-codec']
audio_keys = ['audio-codec']
for t, v in tags.items():
if t in video_keys:
new_metadata['video'][0]['tags'][t] = tags[t]
elif t in audio_keys:
new_metadata['audio'][0]['tags'][t] = tags[t]
else:
new_metadata['common']['tags'][t] = tags[t]
db.execute(vid_data.update()
.where(vid_data.c.media_entry==row.media_entry)
.values(orig_metadata=json.dumps(new_metadata)))
db.commit()

View File

@ -68,19 +68,18 @@ class VideoData(Base):
"""
orig_metadata = self.orig_metadata or {}
if "webm_video" not in self.get_media_entry.media_files \
and "mimetype" in orig_metadata \
and "tags" in orig_metadata \
and "audio-codec" in orig_metadata["tags"] \
and "video-codec" in orig_metadata["tags"]:
if ("webm_video" not in self.get_media_entry.media_files
and "mimetype" in orig_metadata['common']['tags']
and "codec" in orig_metadata['audio']
and "codec" in orig_metadata['video']):
if orig_metadata['mimetype'] == 'application/ogg':
# stupid ambiguous .ogg extension
mimetype = "video/ogg"
else:
mimetype = orig_metadata['mimetype']
mimetype = orig_metadata['common']['tags']['mimetype']
video_codec = orig_metadata["tags"]["video-codec"].lower()
audio_codec = orig_metadata["tags"]["audio-codec"].lower()
video_codec = orig_metadata["video"]["codec"].lower()
audio_codec = orig_metadata["audio"]["codec"].lower()
# We don't want the "video" at the end of vp8...
# not sure of a nicer way to be cleaning this stuff

View File

@ -73,6 +73,37 @@ def sniff_handler(media_file, filename):
_log.error('Could not discover {0}'.format(filename))
return None
def get_tags(stream_info):
'gets all tags and their values from stream info'
taglist = stream_info.get_tags()
if not taglist:
return {}
tags = []
taglist.foreach(
lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
tags = dict(tags)
# date/datetime should be converted from GDate/GDateTime to strings
if 'date' in tags:
date = tags['date']
tags['date'] = "%s-%s-%s" % (
date.year, date.month, date.day)
if 'datetime' in tags:
# TODO: handle timezone info; gst.get_time_zone_offset +
# python's tzinfo should help
dt = tags['datetime']
tags['datetime'] = datetime.datetime(
dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
dt.get_minute(), dt.get_second(),
dt.get_microsecond()).isoformat()
for k, v in tags.items():
# types below are accepted by json; others must not present
if not isinstance(v, (dict, list, basestring, int, float, bool,
type(None))):
del tags[k]
return dict(tags)
def store_metadata(media_entry, metadata):
"""
Store metadata from this video for this media entry.
@ -80,59 +111,40 @@ def store_metadata(media_entry, metadata):
stored_metadata = dict()
audio_info_list = metadata.get_audio_streams()
if audio_info_list:
audio_info = audio_info_list[0]
stored_metadata['audiochannels'] = audio_info.get_channels()
# video is always there
video_info = metadata.get_video_streams()[0]
# Let's pull out the easy, not having to be converted ones first
stored_metadata = dict()
audio_info_list = metadata.get_audio_streams()
if audio_info:
audio_info = audio_info_list[0]
stored_metadata['audiochannels'] = audio_info.get_channels()
# video is always there
video_info = metadata.get_video_streams()[0]
# Let's pull out the easy, not having to be converted ones first
stored_metadata['videoheight'] = video_info.get_height()
stored_metadata['videowidth'] = video_info.get_width()
stored_metadata['videolength'] = metadata.get_duration()
stored_metadata['mimetype'] = metadata.get_tags().get_string('mimetype')
# We have to convert videorate into a sequence because it's a
# special type normally..
stored_metadata['videorate'] = [video_info.get_framerate_num(),
video_info.get_framerate_denom()]
stored_metadata['audio'] = []
for audio_info in audio_info_list:
stored_metadata['audio'].append(
{
'channels': audio_info.get_channels(),
'bitrate': audio_info.get_bitrate(),
'depth': audio_info.get_depth(),
'languange': audio_info.get_language(),
'sample_rate': audio_info.get_sample_rate(),
'tags': get_tags(audio_info)
})
if metadata.get_tags():
tags_metadata = metadata.get_tags()
# we don't use *all* of these, but we know these ones are
# safe...
# get_string returns (success, value) tuple
tags = dict(
[(key, tags_metadata.get_string(key)[1])
for key in [
"application-name", "artist", "audio-codec", "bitrate",
"container-format", "copyright", "encoder",
"encoder-version", "license", "nominal-bitrate", "title",
"video-codec"]
if tags_metadata.get_string(key)[0]])
(success, date) = tags_metadata.get_date('date')
if success:
tags['date'] = "%s-%s-%s" % (
date.year, date.month, date.day)
video_info_list = metadata.get_video_streams()
if video_info_list:
stored_metadata['video'] = []
for video_info in video_info_list:
stored_metadata['video'].append(
{
'width': video_info.get_width(),
'height': video_info.get_height(),
'bitrate': video_info.get_bitrate(),
'depth': video_info.get_depth(),
'videorate': [video_info.get_framerate_num(),
video_info.get_framerate_denom()],
'tags': get_tags(video_info)
})
# TODO: handle timezone info; gst.get_time_zone_offset +
# python's tzinfo should help
(success, dt) = tags_metadata.get_date_time('datetime')
if success:
tags['datetime'] = datetime.datetime(
dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
dt.get_minute(), dt.get_second(),
dt.get_microsecond()).isoformat()
stored_metadata['tags'] = tags
stored_metadata['common'] = {
'duration': metadata.get_duration(),
'tags': get_tags(metadata),
}
# Only save this field if there's something to save
if len(stored_metadata):
media_entry.media_data_init(
orig_metadata=stored_metadata)
media_entry.media_data_init(orig_metadata=stored_metadata)
class CommonVideoProcessor(MediaProcessor):
@ -234,7 +246,8 @@ class CommonVideoProcessor(MediaProcessor):
if skip_transcode(metadata, medium_size):
_log.debug('Skipping transcoding')
dst_dimensions = metadata['videowidth'], metadata['videoheight']
dst_dimensions = (metadata.get_video_streams()[0].get_width(),
metadata.get_video_streams()[0].get_height())
# If there is an original and transcoded, delete the transcoded
# since it must be of lower quality then the original

View File

@ -43,23 +43,23 @@ def skip_transcode(metadata, size):
config['container_formats']):
return False
if (config['video_codecs'] and
metadata.get_tags().get_string('video-codec')):
if not (metadata.get_tags().get_string('video-codec') in
config['video_codecs']):
return False
if config['video_codecs']:
for video_info in metadata.get_video_streams():
if not (video_info.get_tags().get_string('video-codec') in
config['video_codecs']):
return False
if (config['audio_codecs'] and
metadata.get_tags().get_string('audio-codec')):
if not (metadata.get_tags().get_string('audio-codec') in
config['audio_codecs']):
return False
if config['audio_codecs']:
for audio_info in metadata.get_audio_streams():
if not (audio_info.get_tags().get_string('audio-codec') in
config['audio_codecs']):
return False
video_info = metadata.get_video_streams()[0]
if config['dimensions_match']:
if not video_info.get_height() <= size[1]:
return False
if not video_info.get_width() <= size[0]:
return False
for video_info in metadata.get_video_streams():
if not video_info.get_height() <= size[1]:
return False
if not video_info.get_width() <= size[0]:
return False
return True