Added multiple a-v streams handling

Before only single video stream files were suported. This patch adds support for files with multiple video streams. Metadata of such files is now correctly stored. This required change of the schema used to store info and the change is done in a migration.
2014-08-08 06:09:28 +04:00 · 2014-08-08 06:09:28 +04:00 · 2d1e89055d
commit 2d1e89055d
parent 945a1c5667
5 changed files with 151 additions and 74 deletions
--- a/mediagoblin/media_types/audio/processing.py
+++ b/mediagoblin/media_types/audio/processing.py
@ -36,7 +36,11 @@ MEDIA_TYPE = 'mediagoblin.media_types.audio'

 def sniff_handler(media_file, filename):
    _log.info('Sniffing {0}'.format(MEDIA_TYPE))
-    data = discover(media_file.name)
+    try:
+        data = discover(media_file.name)
+    except Exception as e:
+        _log.info(unicode(e))
+        return None
    if data and data.get_audio_streams() and not data.get_video_streams():
        return MEDIA_TYPE
    return None
--- a/mediagoblin/media_types/video/migrations.py
+++ b/mediagoblin/media_types/video/migrations.py
@ -18,6 +18,8 @@ from mediagoblin.db.migration_tools import RegisterMigration, inspect_table

 from sqlalchemy import MetaData, Column, Unicode

+import json
+
 MIGRATIONS = {}


@ -47,3 +49,62 @@ def webm_640_to_webm_video(db):
                values(name='webm_video'))

    db.commit()
+
+
+@RegisterMigration(3, MIGRATIONS)
+def change_metadata_format(db):
+    """Change orig_metadata format for multi-stream a-v"""
+    db_metadata = MetaData(bind=db.bind)
+
+    vid_data = inspect_table(db_metadata, "video__mediadata")
+
+    for row in db.execute(vid_data.select()):
+        metadata = json.loads(row.orig_metadata)
+
+        if not metadata:
+            continue
+
+        # before this migration there was info about only one video or audio
+        # stream. So, we store existing info as the first item in the list
+        new_metadata = {'audio': [], 'video': [], 'common': {}}
+        video_key_map = {  # old: new
+                'videoheight': 'height',
+                'videowidth': 'width',
+                'videorate': 'rate',
+                }
+        audio_key_map = {  # old: new
+                'audiochannels': 'channels',
+                }
+        common_key_map = {
+                'videolength': 'length',
+                }
+
+        new_metadata['video'] = [dict((v, metadata.get(k))
+                for k, v in video_key_map.items() if metadata.get(k))]
+        new_metadata['audio'] = [dict((v, metadata.get(k))
+                for k, v in audio_key_map.items() if metadata.get(k))]
+        new_metadata['common'] = dict((v, metadata.get(k))
+                for k, v in common_key_map.items() if metadata.get(k))
+        
+        # 'mimetype' should be in tags
+        new_metadata['common']['tags'] = {'mimetype': metadata.get('mimetype')}
+        if 'tags' in metadata:
+            new_metadata['video'][0]['tags'] = {}
+            new_metadata['audio'][0]['tags'] = {}
+
+            tags = metadata['tags']
+
+            video_keys = ['encoder', 'encoder-version', 'video-codec']
+            audio_keys = ['audio-codec']
+
+            for t, v in tags.items():
+                if t in video_keys:
+                    new_metadata['video'][0]['tags'][t] = tags[t]
+                elif t in audio_keys:
+                    new_metadata['audio'][0]['tags'][t] = tags[t]
+                else:
+                    new_metadata['common']['tags'][t] = tags[t]
+        db.execute(vid_data.update()
+                .where(vid_data.c.media_entry==row.media_entry)
+                .values(orig_metadata=json.dumps(new_metadata)))
+    db.commit()
--- a/mediagoblin/media_types/video/models.py
+++ b/mediagoblin/media_types/video/models.py
@ -68,19 +68,18 @@ class VideoData(Base):
        """
        orig_metadata = self.orig_metadata or {}

-        if "webm_video" not in self.get_media_entry.media_files \
-           and "mimetype" in orig_metadata \
-           and "tags" in orig_metadata \
-           and "audio-codec" in orig_metadata["tags"] \
-           and "video-codec" in orig_metadata["tags"]:
+        if ("webm_video" not in self.get_media_entry.media_files
+           and "mimetype" in orig_metadata['common']['tags']
+           and "codec" in orig_metadata['audio']
+           and "codec" in orig_metadata['video']):
            if orig_metadata['mimetype'] == 'application/ogg':
                # stupid ambiguous .ogg extension
                mimetype = "video/ogg"
            else:
-                mimetype = orig_metadata['mimetype']
+                mimetype = orig_metadata['common']['tags']['mimetype']

-            video_codec = orig_metadata["tags"]["video-codec"].lower()
-            audio_codec = orig_metadata["tags"]["audio-codec"].lower()
+            video_codec = orig_metadata["video"]["codec"].lower()
+            audio_codec = orig_metadata["audio"]["codec"].lower()

            # We don't want the "video" at the end of vp8...
            # not sure of a nicer way to be cleaning this stuff
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@ -73,6 +73,37 @@ def sniff_handler(media_file, filename):
        _log.error('Could not discover {0}'.format(filename))
        return None

+def get_tags(stream_info):
+    'gets all tags and their values from stream info'
+    taglist = stream_info.get_tags()
+    if not taglist:
+        return {}
+    tags = []
+    taglist.foreach(
+            lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
+    tags = dict(tags)
+
+    # date/datetime should be converted from GDate/GDateTime to strings
+    if 'date' in tags:
+        date = tags['date']
+        tags['date'] = "%s-%s-%s" % (
+                date.year, date.month, date.day)
+
+    if 'datetime' in tags:
+        # TODO: handle timezone info; gst.get_time_zone_offset +
+        # python's tzinfo should help
+        dt = tags['datetime']
+        tags['datetime'] = datetime.datetime(
+            dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
+            dt.get_minute(), dt.get_second(),
+            dt.get_microsecond()).isoformat()
+    for k, v in tags.items():
+        # types below are accepted by json; others must not present
+        if not isinstance(v, (dict, list, basestring, int, float, bool,
+                              type(None))):
+            del tags[k]
+    return dict(tags)
+
 def store_metadata(media_entry, metadata):
    """
    Store metadata from this video for this media entry.
@ -80,59 +111,40 @@ def store_metadata(media_entry, metadata):
    stored_metadata = dict()
    audio_info_list = metadata.get_audio_streams()
    if audio_info_list:
-        audio_info = audio_info_list[0]
-        stored_metadata['audiochannels'] = audio_info.get_channels()
-    # video is always there
-    video_info = metadata.get_video_streams()[0]
-    # Let's pull out the easy, not having to be converted ones first
-    stored_metadata = dict()
-    audio_info_list = metadata.get_audio_streams()
-    if audio_info:
-        audio_info = audio_info_list[0]
-        stored_metadata['audiochannels'] = audio_info.get_channels()
-    # video is always there
-    video_info = metadata.get_video_streams()[0]
-    # Let's pull out the easy, not having to be converted ones first
-    stored_metadata['videoheight'] = video_info.get_height()
-    stored_metadata['videowidth'] = video_info.get_width()
-    stored_metadata['videolength'] = metadata.get_duration()
-    stored_metadata['mimetype'] = metadata.get_tags().get_string('mimetype')
-    # We have to convert videorate into a sequence because it's a
-    # special type normally..
-    stored_metadata['videorate'] = [video_info.get_framerate_num(),
-                                   video_info.get_framerate_denom()]
+        stored_metadata['audio'] = []
+    for audio_info in audio_info_list:
+        stored_metadata['audio'].append(
+                {
+                    'channels': audio_info.get_channels(),
+                    'bitrate': audio_info.get_bitrate(),
+                    'depth': audio_info.get_depth(),
+                    'languange': audio_info.get_language(),
+                    'sample_rate': audio_info.get_sample_rate(),
+                    'tags': get_tags(audio_info)
+                })

-    if metadata.get_tags():
-        tags_metadata = metadata.get_tags()
-        # we don't use *all* of these, but we know these ones are
-        # safe...
-        # get_string returns (success, value) tuple
-        tags = dict(
-            [(key, tags_metadata.get_string(key)[1])
-             for key in [
-                 "application-name", "artist", "audio-codec", "bitrate",
-                 "container-format", "copyright", "encoder",
-                 "encoder-version", "license", "nominal-bitrate", "title",
-                 "video-codec"]
-             if tags_metadata.get_string(key)[0]])
-        (success, date) = tags_metadata.get_date('date')
-        if success:
-            tags['date'] = "%s-%s-%s" % (
-                date.year, date.month, date.day)
+    video_info_list = metadata.get_video_streams()
+    if video_info_list:
+        stored_metadata['video'] = []
+    for video_info in video_info_list:
+        stored_metadata['video'].append(
+                {
+                    'width': video_info.get_width(),
+                    'height': video_info.get_height(),
+                    'bitrate': video_info.get_bitrate(),
+                    'depth': video_info.get_depth(),
+                    'videorate': [video_info.get_framerate_num(),
+                                  video_info.get_framerate_denom()],
+                    'tags': get_tags(video_info)
+                })

-        # TODO: handle timezone info; gst.get_time_zone_offset +
-        #   python's tzinfo should help
-        (success, dt) = tags_metadata.get_date_time('datetime')
-        if success:
-            tags['datetime'] = datetime.datetime(
-                dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
-                dt.get_minute(), dt.get_second(),
-                dt.get_microsecond()).isoformat()
-        stored_metadata['tags'] = tags
+    stored_metadata['common'] = {
+        'duration': metadata.get_duration(),
+        'tags': get_tags(metadata),
+    }
    # Only save this field if there's something to save
    if len(stored_metadata):
-        media_entry.media_data_init(
-            orig_metadata=stored_metadata)
+        media_entry.media_data_init(orig_metadata=stored_metadata)


 class CommonVideoProcessor(MediaProcessor):
@ -234,7 +246,8 @@ class CommonVideoProcessor(MediaProcessor):
        if skip_transcode(metadata, medium_size):
            _log.debug('Skipping transcoding')

-            dst_dimensions = metadata['videowidth'], metadata['videoheight']
+            dst_dimensions = (metadata.get_video_streams()[0].get_width(),
+                    metadata.get_video_streams()[0].get_height())

            # If there is an original and transcoded, delete the transcoded
            # since it must be of lower quality then the original
--- a/mediagoblin/media_types/video/util.py
+++ b/mediagoblin/media_types/video/util.py
@ -43,23 +43,23 @@ def skip_transcode(metadata, size):
                config['container_formats']):
            return False

-    if (config['video_codecs'] and
-            metadata.get_tags().get_string('video-codec')):
-        if not (metadata.get_tags().get_string('video-codec') in
-                config['video_codecs']):
-            return False
+    if config['video_codecs']:
+        for video_info in metadata.get_video_streams():
+            if not (video_info.get_tags().get_string('video-codec') in
+                    config['video_codecs']):
+                return False

-    if (config['audio_codecs'] and
-            metadata.get_tags().get_string('audio-codec')):
-        if not (metadata.get_tags().get_string('audio-codec') in
-                config['audio_codecs']):
-            return False
+    if config['audio_codecs']:
+        for audio_info in metadata.get_audio_streams():
+            if not (audio_info.get_tags().get_string('audio-codec') in
+                    config['audio_codecs']):
+                return False

-    video_info = metadata.get_video_streams()[0]
    if config['dimensions_match']:
-        if not video_info.get_height() <= size[1]:
-            return False
-        if not video_info.get_width() <= size[0]:
-            return False
+        for video_info in metadata.get_video_streams():
+            if not video_info.get_height() <= size[1]:
+                return False
+            if not video_info.get_width() <= size[0]:
+                return False

    return True