From 7e266d5a374fe16355375f82be45e2c5707450d1 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Thu, 29 May 2014 14:50:32 +0400
Subject: [PATCH 01/13] Rewrite thumbnailer

Previous thumbnailer didn't always work properly. It was also not ready
to be ported to GStreamer 1.0

The rewrite makes it shorter, more pythonic and prepares it for porting.

 - no longer uses playbin2;
 - is tested
 - logs some events
 - previous thumbnailer is removed
---
 mediagoblin/media_types/video/processing.py  |   9 +-
 mediagoblin/media_types/video/transcoders.py | 393 +++----------------
 mediagoblin/tests/test_video.py              |  71 ++++
 3 files changed, 143 insertions(+), 330 deletions(-)
 create mode 100644 mediagoblin/tests/test_video.py

diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index bbed4f12..ca9a6ad9 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -74,6 +74,13 @@ def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
     """
+    stored_metadata = dict()
+    audio_info_list = metadata.get_audio_streams()
+    if audio_info_list:
+        audio_info = audio_info_list[0]
+        stored_metadata['audiochannels'] = audio_info.get_channels()
+    # video is always there
+    video_info = metadata.get_video_streams()[0]
     # Let's pull out the easy, not having to be converted ones first
     stored_metadata = dict(
         [(key, metadata[key])
@@ -270,7 +277,7 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # We will only use the width so that the correct scale is kept
-        transcoders.VideoThumbnailerMarkII(
+        transcoders.capture_thumb(
             self.process_filename,
             tmp_thumb,
             thumb_size[0])
diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index 3a3fa97f..e08b897c 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -55,336 +55,73 @@ os.putenv('GST_DEBUG_DUMP_DOT_DIR', '/tmp')
 
 def pixbuf_to_pilbuf(buf):
     data = list()
-    for i in range(0, len(buf), 3):
-        r, g, b = struct.unpack('BBB', buf[i:i + 3])
+    for i in range(0, len(buf)-4, 4):
+        r, g, b, x = struct.unpack('BBBB', buf[i:i + 4])
+        # XXX: can something be done with the 'X' part of RGBX?
         data.append((r, g, b))
-
     return data
 
-
-class VideoThumbnailerMarkII(object):
-    '''
-    Creates a thumbnail from a video file. Rewrite of VideoThumbnailer.
-
-    Large parts of the functionality and overall architectue contained within
-    this object is taken from Participatory Culture Foundation's
-    `gst_extractor.Extractor` object last seen at
-    https://github.com/pculture/miro/blob/master/tv/lib/frontends/widgets/gst/gst_extractor.py
-    in the `miro` codebase.
-
-    The `miro` codebase and the gst_extractor.py are licensed under the GNU
-    General Public License v2 or later.
-    '''
-    STATE_NULL = 0
-    STATE_HALTING = 1
-    STATE_PROCESSING = 2
-    STATE_PROCESSING_THUMBNAIL = 3
-
-    def __init__(self, source_path, dest_path, width=None, height=None,
-            position_callback=None):
-        self.state = self.STATE_NULL
-
-        self.has_reached_playbin_pause = False
-
-        self.thumbnail_pipeline = None
-
-        self.permission_to_take_picture = False
-
-        self.buffer_probes = {}
-
-        self.errors = []
-
-        self.source_path = os.path.abspath(source_path)
-        self.dest_path = os.path.abspath(dest_path)
-
-        self.width = width
-        self.height = height
-        self.position_callback = position_callback \
-                or self.wadsworth_position_callback
-
-        self.mainloop = gobject.MainLoop()
-
-        self.playbin = gst.element_factory_make('playbin')
-
-        self.videosink = gst.element_factory_make('fakesink', 'videosink')
-        self.audiosink = gst.element_factory_make('fakesink', 'audiosink')
-
-        self.playbin.set_property('video-sink', self.videosink)
-        self.playbin.set_property('audio-sink', self.audiosink)
-
-        self.playbin_message_bus = self.playbin.get_bus()
-
-        self.playbin_message_bus.add_signal_watch()
-        self.playbin_bus_watch_id = self.playbin_message_bus.connect(
-                'message',
-                self.on_playbin_message)
-
-        self.playbin.set_property(
-                'uri',
-                'file:{0}'.format(
-                    urllib.pathname2url(self.source_path)))
-
-        self.playbin.set_state(gst.STATE_PAUSED)
-
-        try:
-            self.run()
-        except Exception as exc:
-            _log.critical(
-                'Exception "{0}" caught, shutting down mainloop and re-raising'\
-                    .format(exc))
-            self.disconnect()
-            raise
-
-    def wadsworth_position_callback(self, duration, gst):
-        return self.duration / 100 * 30
-
-    def run(self):
-        self.mainloop.run()
-
-    def on_playbin_message(self, message_bus, message):
-        # Silenced to prevent clobbering of output
-        #_log.debug('playbin message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('playbin error: {0}'.format(message))
-            gobject.idle_add(self.on_playbin_error)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('playbin state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED:
-                if message.src == self.playbin:
-                    _log.info('playbin ready')
-                    gobject.idle_add(self.on_playbin_paused)
-
-    def on_playbin_paused(self):
-        if self.has_reached_playbin_pause:
-            _log.warn('Has already reached on_playbin_paused. Aborting \
-without doing anything this time.')
-            return False
-
-        self.has_reached_playbin_pause = True
-
-        # XXX: Why is this even needed at this point?
-        current_video = self.playbin.get_property('current-video')
-
-        if not current_video:
-            _log.critical('Could not get any video data \
-from playbin')
-        else:
-            _log.info('Got video data from playbin')
-
-        self.duration = self.get_duration(self.playbin)
-        self.permission_to_take_picture = True
-        self.buffer_probes = {}
-
-        pipeline = ''.join([
-            'filesrc location="%s" ! decodebin2 ! ' % self.source_path,
-            'ffmpegcolorspace ! videoscale ! ',
-            'video/x-raw-rgb,depth=24,bpp=24,pixel-aspect-ratio=1/1',
-            ',width={0}'.format(self.width) if self.width else '',
-            ',height={0}'.format(self.height) if self.height else '',
-            ' ! ',
-            'fakesink signal-handoffs=True'])
-
-        _log.debug('thumbnail_pipeline: {0}'.format(pipeline))
-
-        self.thumbnail_pipeline = gst.parse_launch(pipeline)
-        self.thumbnail_message_bus = self.thumbnail_pipeline.get_bus()
-        self.thumbnail_message_bus.add_signal_watch()
-        self.thumbnail_bus_watch_id = self.thumbnail_message_bus.connect(
-                'message',
-                self.on_thumbnail_message)
-
-        self.thumbnail_pipeline.set_state(gst.STATE_PAUSED)
-
-        gobject.timeout_add(3000, self.on_gobject_timeout)
-
-        return False
-
-    def on_thumbnail_message(self, message_bus, message):
-        # This is silenced to prevent clobbering of the terminal window
-        #_log.debug('thumbnail message: {0}'.format(message))
-
-        if message.type == gst.MESSAGE_ERROR:
-            _log.error('thumbnail error: {0}'.format(message.parse_error()))
-            gobject.idle_add(self.on_thumbnail_error, message)
-
-        if message.type == gst.MESSAGE_STATE_CHANGED:
-            prev_state, cur_state, pending_state = \
-                    message.parse_state_changed()
-
-            _log.debug('thumbnail state changed: \nprev: {0}\ncur: {1}\n \
-pending: {2}'.format(
-    prev_state,
-    cur_state,
-    pending_state))
-
-            if cur_state == gst.STATE_PAUSED and \
-               not self.state == self.STATE_PROCESSING_THUMBNAIL:
-                # Find the fakesink sink pad and attach the on_buffer_probe
-                # handler to it.
-                seek_amount = self.position_callback(self.duration, gst)
-
-                seek_result = self.thumbnail_pipeline.seek(
-                        1.0,
-                        gst.FORMAT_TIME,
-                        gst.SEEK_FLAG_FLUSH | gst.SEEK_FLAG_ACCURATE,
-                        gst.SEEK_TYPE_SET,
-                        seek_amount,
-                        gst.SEEK_TYPE_NONE,
-                        0)
-
-                if not seek_result:
-                    _log.info('Could not seek.')
-                else:
-                    _log.info('Seek successful, attaching buffer probe')
-                    self.state = self.STATE_PROCESSING_THUMBNAIL
-                    for sink in self.thumbnail_pipeline.sinks():
-                        sink_name = sink.get_name()
-                        sink_factory_name = sink.get_factory().get_name()
-
-                        if sink_factory_name == 'fakesink':
-                            sink_pad = sink.get_pad('sink')
-
-                            self.buffer_probes[sink_name] = sink_pad\
-                                    .add_buffer_probe(
-                                            self.on_pad_buffer_probe,
-                                            sink_name)
-
-                            _log.info('Attached buffer probes: {0}'.format(
-                                self.buffer_probes))
-
-                            break
-
-
-            elif self.state == self.STATE_PROCESSING_THUMBNAIL:
-                _log.info('Already processing thumbnail')
-
-    def on_pad_buffer_probe(self, *args):
-        _log.debug('buffer probe handler: {0}'.format(args))
-        gobject.idle_add(lambda: self.take_snapshot(*args))
-
-    def take_snapshot(self, pad, buff, name):
-        if self.state == self.STATE_HALTING:
-            _log.debug('Pipeline is halting, will not take snapshot')
-            return False
-
-        _log.info('Taking snapshot! ({0})'.format(
-            (pad, buff, name)))
-        try:
-            caps = buff.caps
-            if caps is None:
-                _log.error('No buffer caps present /take_snapshot')
-                self.disconnect()
-
-            _log.debug('caps: {0}'.format(caps))
-
-            filters = caps[0]
-            width = filters['width']
-            height = filters['height']
-
-            im = Image.new('RGB', (width, height))
-
-            data = pixbuf_to_pilbuf(buff.data)
-
-            im.putdata(data)
-
-            im.save(self.dest_path)
-
-            _log.info('Saved snapshot!')
-
-            self.disconnect()
-
-        except gst.QueryError as exc:
-            _log.error('take_snapshot - QueryError: {0}'.format(exc))
-
-        return False
-
-    def on_thumbnail_error(self, message):
-        scaling_failed = False
-
-        if 'Error calculating the output scaled size - integer overflow' \
-           in message.parse_error()[1]:
-            # GStreamer videoscale sometimes fails to calculate the dimensions
-            # given only one of the destination dimensions and the source
-            # dimensions. This is a workaround in case videoscale returns an
-            # error that indicates this has happened.
-            scaling_failed = True
-            _log.error('Thumbnailing failed because of videoscale integer'
-                       ' overflow. Will retry with fallback.')
-        else:
-            _log.error('Thumbnailing failed: {0}'.format(message.parse_error()))
-
-        # Kill the current mainloop
-        self.disconnect()
-
-        if scaling_failed:
-            # Manually scale the destination dimensions
-            _log.info('Retrying with manually set sizes...')
-
-            info = VideoTranscoder().discover(self.source_path)
-
-            h = info['videoheight']
-            w = info['videowidth']
-            ratio = 180 / int(w)
-            h = int(h * ratio)
-
-            self.__init__(self.source_path, self.dest_path, 180, h)
-
-    def disconnect(self):
-        self.state = self.STATE_HALTING
-
-        if self.playbin is not None:
-            self.playbin.set_state(gst.STATE_NULL)
-
-            for sink in self.playbin.sinks():
-                sink_name = sink.get_name()
-                sink_factory_name = sink.get_factory().get_name()
-
-                if sink_factory_name == 'fakesink':
-                    sink_pad = sink.get_pad('sink')
-                    sink_pad.remove_buffer_probe(self.buffer_probes[sink_name])
-                    del self.buffer_probes[sink_name]
-
-            self.playbin = None
-
-        if self.thumbnail_pipeline is not None:
-            self.thumbnail_pipeline.set_state(gst.STATE_NULL)
-            self.thumbnail_pipeline = None
-
-        if self.playbin_message_bus is not None:
-            self.playbin_message_bus.disconnect(self.playbin_bus_watch_id)
-            self.playbin_message_bus = None
-
-        self.halt()
-
-    def halt(self):
-        gobject.idle_add(self.mainloop.quit)
-
-    def on_gobject_timeout(self):
-        _log.critical('Reached gobject timeout')
-        self.disconnect()
-
-    def get_duration(self, pipeline, attempt=1):
-        if attempt == 5:
-            _log.critical('Pipeline duration query retry limit reached.')
-            return 0
-
-        try:
-            return pipeline.query_duration(gst.FORMAT_TIME)[0]
-        except gst.QueryError as exc:
-            _log.error('Could not get duration on attempt {0}: {1}'.format(
-                attempt,
-                exc))
-            return self.get_duration(pipeline, attempt + 1)
+def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
+    def pad_added(element, pad, connect_to):
+        caps = pad.get_caps()
+        name = caps[0].get_name()
+        _log.debug('on_pad_added: {0}'.format(name))
+        if name.startswith('video') and not connect_to.is_linked():
+            pad.link(connect_to)
+    # construct pipeline: uridecodebin ! ffmpegcolorspace ! videoscale ! \
+    # ! CAPS ! appsink
+    pipeline = gst.Pipeline()
+    uridecodebin = gst.element_factory_make('uridecodebin')
+    uridecodebin.set_property('uri', 'file://{0}'.format(video_path))
+    ffmpegcolorspace = gst.element_factory_make('ffmpegcolorspace')
+    uridecodebin.connect('pad-added', pad_added,
+                         ffmpegcolorspace.get_pad('sink'))
+    videoscale = gst.element_factory_make('videoscale')
+    filter = gst.element_factory_make('capsfilter', 'filter')
+    # create caps for video scaling
+    caps_struct = gst.Structure('video/x-raw-rgb')
+    caps_struct.set_value('pixel-aspect-ratio', gst.Fraction(1, 1))
+    if height:
+        caps_struct.set_value('height', height)
+    if width:
+        caps_struct.set_value('width', width)
+    caps = gst.Caps(caps_struct)
+    filter.set_property('caps', caps)
+    appsink = gst.element_factory_make('appsink')
+    pipeline.add(uridecodebin, ffmpegcolorspace, videoscale, filter, appsink)
+    gst.element_link_many(ffmpegcolorspace, videoscale, filter, appsink)
+    # pipeline constructed, starting playing, but first some preparations
+    if pipeline.set_state(gst.STATE_PAUSED) == gst.STATE_CHANGE_FAILURE:
+        _log.warning('state change failed')
+    pipeline.get_state()
+    duration = pipeline.query_duration(gst.FORMAT_TIME, None)[0]
+    if duration == gst.CLOCK_TIME_NONE:
+        _log.warning('query_duration failed')
+        duration = 0  # XXX
+    seek_to = int(duration * int(percent * 100) / 100)
+    _log.debug('Seeking to {0} of {1}'.format(
+            seek_to / gst.SECOND, duration / gst.SECOND))
+    seek = pipeline.seek_simple(gst.FORMAT_TIME, gst.SEEK_FLAG_FLUSH, seek_to)
+    if not seek:
+        _log.warning('seek failed')
+    # get sample, retrieve it's format and save
+    sample = appsink.emit("pull-preroll")
+    if not sample:
+        _log.warning('could not get sample')
+        return
+    caps = sample.get_caps()
+    if not caps:
+        _log.warning('could not get snapshot format')
+    structure = caps.get_structure(0)
+    (success, width) = structure.get_int('width')
+    (success, height) = structure.get_int('height')
+    buffer = sample.get_buffer()
+    im = Image.frombytes('RGB', (width, height),
+                         buffer.extract_dup(0, buffer.get_size()))
+    im.save(dest_path)
+    _log.info('thumbnail saved to {0}'.format(dest_path))
+    # cleanup
+    pipeline.set_state(gst.STATE_NULL)
 
 
 class VideoTranscoder(object):
@@ -451,7 +188,6 @@ class VideoTranscoder(object):
         self.discoverer.discover()
 
         self.loop.run()
-
         if hasattr(self, '_discovered_data'):
             return self._discovered_data.__dict__
         else:
@@ -729,7 +465,6 @@ class VideoTranscoder(object):
 
 if __name__ == '__main__':
     os.nice(19)
-    logging.basicConfig()
     from optparse import OptionParser
 
     parser = OptionParser(
diff --git a/mediagoblin/tests/test_video.py b/mediagoblin/tests/test_video.py
new file mode 100644
index 00000000..0fe58f60
--- /dev/null
+++ b/mediagoblin/tests/test_video.py
@@ -0,0 +1,71 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import tempfile
+import shutil
+import os
+import pytest
+from contextlib import contextmanager
+import logging
+import imghdr
+
+#TODO: this should be skipped if video plugin is not enabled
+import pygst
+pygst.require('0.10')
+import gst
+
+from mediagoblin.media_types.video.transcoders import capture_thumb
+
+@contextmanager
+def create_data(suffix):
+    video = tempfile.NamedTemporaryFile()
+    src = gst.element_factory_make('videotestsrc')
+    src.set_property('num-buffers', 50)
+    enc = gst.element_factory_make('theoraenc')
+    mux = gst.element_factory_make('oggmux')
+    dst = gst.element_factory_make('filesink')
+    dst.set_property('location', video.name)
+    pipeline = gst.Pipeline()
+    pipeline.add(src, enc, mux, dst)
+    gst.element_link_many(src, enc, mux, dst)
+    pipeline.set_state(gst.STATE_PLAYING)
+    # wait for finish
+    bus = pipeline.get_bus()
+    message = bus.timed_pop_filtered(gst.CLOCK_TIME_NONE,
+                                     gst.MESSAGE_ERROR | gst.MESSAGE_EOS)
+    thumb = tempfile.NamedTemporaryFile(suffix=suffix)
+    pipeline.set_state(gst.STATE_NULL)
+    yield (video.name, thumb.name)
+
+
+#TODO: this should be skipped if video plugin is not enabled
+def test_thumbnails():
+    '''
+    Test thumbnails generation.
+    1. Create a video from gst's videotestsrc
+    3. Capture thumbnail
+    4. Remove it
+    '''
+    #data  create_data() as (video_name, thumbnail_name):
+    test_formats = [('.png', 'png'), ('.jpg', 'jpeg'), ('.gif', 'gif')]
+    for suffix, format in test_formats:
+        with create_data(suffix) as (video_name, thumbnail_name):
+            capture_thumb(video_name, thumbnail_name, width=40)
+            # check if png
+            assert imghdr.what(thumbnail_name) == format
+            # TODO: check height and width
+            # FIXME: it doesn't work with small width, say, 10px. This should be
+            # fixed somehow

From 91f5f5e791e5bc3680cac2b0103429713517f7d4 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Thu, 5 Jun 2014 15:42:12 +0400
Subject: [PATCH 02/13] Porting video to GStreamer 1.0

Porting includes:
 - thumbnailer
 - transcoder
 - metadata handling
 - new common discoverer for media
 - new tests with in-memory test video generating
 - handling regardless of audio availability in the file
 - Pythonic gst pipelines
---
 mediagoblin/media_types/tools.py             |  21 ++
 mediagoblin/media_types/video/processing.py  |  70 ++--
 mediagoblin/media_types/video/transcoders.py | 367 +++++++------------
 mediagoblin/media_types/video/util.py        |  28 +-
 mediagoblin/processing/__init__.py           |   5 +-
 mediagoblin/tests/test_video.py              | 116 ++++--
 6 files changed, 301 insertions(+), 306 deletions(-)

diff --git a/mediagoblin/media_types/tools.py b/mediagoblin/media_types/tools.py
index fe7b3772..0822f51c 100644
--- a/mediagoblin/media_types/tools.py
+++ b/mediagoblin/media_types/tools.py
@@ -17,6 +17,11 @@ import logging
 
 from mediagoblin import mg_globals
 
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst, GstPbutils, GLib
+Gst.init(None)
+
 _log = logging.getLogger(__name__)
 
 
@@ -25,3 +30,19 @@ def media_type_warning():
         _log.warning('Media_types have been converted to plugins. Old'
                      ' media_types will no longer work. Please convert them'
                      ' to plugins to continue using them.')
+
+
+def discover(src):
+    '''
+    Discover properties about a media file
+    '''
+    _log.info('Discovering {0}...'.format(src))
+    uri = 'file://{0}'.format(src)
+    discoverer = GstPbutils.Discoverer.new(60 * Gst.SECOND)
+    try:
+        info = discoverer.discover_uri(uri)
+    except GLib.GError as e:
+        _log.warning(u'Exception: {0}'.format(e))
+        info = None
+    _log.info('Done')
+    return info
diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index ca9a6ad9..588af282 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -47,15 +47,7 @@ class VideoTranscodingFail(BaseProcessingFail):
 EXCLUDED_EXTS = ["nef", "cr2"]
 
 def sniff_handler(media_file, filename):
-    name, ext = os.path.splitext(filename)
-    clean_ext = ext.lower()[1:]
-
-    if clean_ext in EXCLUDED_EXTS:
-        # We don't handle this filetype, though gstreamer might think we can
-        return None
-
-    transcoder = transcoders.VideoTranscoder()
-    data = transcoder.discover(media_file.name)
+    data = transcoders.discover(media_file.name)
 
     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
     _log.debug('Discovered: {0}'.format(data))
@@ -64,7 +56,7 @@ def sniff_handler(media_file, filename):
         _log.error('Could not discover {0}'.format(filename))
         return None
 
-    if data['is_video'] is True:
+    if data.get_video_streams():
         return MEDIA_TYPE
 
     return None
@@ -82,51 +74,50 @@ def store_metadata(media_entry, metadata):
     # video is always there
     video_info = metadata.get_video_streams()[0]
     # Let's pull out the easy, not having to be converted ones first
-    stored_metadata = dict(
-        [(key, metadata[key])
-         for key in [
-             "videoheight", "videolength", "videowidth",
-             "audiorate", "audiolength", "audiochannels", "audiowidth",
-             "mimetype"]
-         if key in metadata])
-
+    stored_metadata = dict()
+    audio_info_list = metadata.get_audio_streams()
+    if audio_info:
+        audio_info = audio_info_list[0]
+        stored_metadata['audiochannels'] = audio_info.get_channels()
+    # video is always there
+    video_info = metadata.get_video_streams()[0]
+    # Let's pull out the easy, not having to be converted ones first
+    stored_metadata['videoheight'] = video_info.get_height()
+    stored_metadata['videowidth'] = video_info.get_width()
+    stored_metadata['videolength'] = metadata.get_duration()
+    stored_metadata['mimetype'] = metadata.get_tags().get_string('mimetype')
     # We have to convert videorate into a sequence because it's a
     # special type normally..
+    stored_metadata['videorate'] = [video_info.get_framerate_num(),
+                                   video_info.get_framerate_denom()]
 
-    if "videorate" in metadata:
-        videorate = metadata["videorate"]
-        stored_metadata["videorate"] = [videorate.num, videorate.denom]
-
-    # Also make a whitelist conversion of the tags.
-    if "tags" in metadata:
-        tags_metadata = metadata['tags']
-
+    if metadata.get_tags():
+        tags_metadata = metadata.get_tags()
         # we don't use *all* of these, but we know these ones are
         # safe...
+        # get_string returns (success, value) tuple
         tags = dict(
-            [(key, tags_metadata[key])
+            [(key, tags_metadata.get_string(key)[1])
              for key in [
                  "application-name", "artist", "audio-codec", "bitrate",
                  "container-format", "copyright", "encoder",
                  "encoder-version", "license", "nominal-bitrate", "title",
                  "video-codec"]
-             if key in tags_metadata])
-        if 'date' in tags_metadata:
-            date = tags_metadata['date']
+             if tags_metadata.get_string(key)[0]])
+        (success, date) = tags_metadata.get_date('date')
+        if success:
             tags['date'] = "%s-%s-%s" % (
                 date.year, date.month, date.day)
 
         # TODO: handle timezone info; gst.get_time_zone_offset +
         #   python's tzinfo should help
-        if 'datetime' in tags_metadata:
-            dt = tags_metadata['datetime']
+        (success, dt) = tags_metadata.get_date_time('datetime')
+        if success:
             tags['datetime'] = datetime.datetime(
                 dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
                 dt.get_minute(), dt.get_second(),
                 dt.get_microsecond()).isoformat()
-
         stored_metadata['tags'] = tags
-
     # Only save this field if there's something to save
     if len(stored_metadata):
         media_entry.media_data_init(
@@ -220,7 +211,10 @@ class CommonVideoProcessor(MediaProcessor):
             return
 
         # Extract metadata and keep a record of it
-        metadata = self.transcoder.discover(self.process_filename)
+        metadata = transcoders.discover(self.process_filename)
+        # metadata's stream info here is a DiscovererContainerInfo instance,
+        # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
+        # metadata itself has container-related data in tags, like video-codec
         store_metadata(self.entry, metadata)
 
         # Figure out whether or not we need to transcode this video or
@@ -243,10 +237,8 @@ class CommonVideoProcessor(MediaProcessor):
                                       vorbis_quality=vorbis_quality,
                                       progress_callback=progress_callback,
                                       dimensions=tuple(medium_size))
-
-            dst_dimensions = self.transcoder.dst_data.videowidth,\
-                self.transcoder.dst_data.videoheight
-
+            video_info = self.transcoder.dst_data.get_video_streams()[0]
+            dst_dimensions = (video_info.get_width(), video_info.get_height())
             self._keep_best()
 
             # Push transcoded video to public storage
diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index e08b897c..d53cabc6 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -19,16 +19,18 @@ from __future__ import division
 import os
 import sys
 import logging
-import urllib
 import multiprocessing
-import gobject
+from mediagoblin.media_types.tools import discover
+
+#os.environ['GST_DEBUG'] = '4,python:4'
 
 old_argv = sys.argv
 sys.argv = []
 
-import pygst
-pygst.require('0.10')
-import gst
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst, GstPbutils
+Gst.init(None)
 
 sys.argv = old_argv
 import struct
@@ -37,12 +39,8 @@ try:
 except ImportError:
     import Image
 
-from gst.extend import discoverer
-
 _log = logging.getLogger(__name__)
 
-gobject.threads_init()
-
 CPU_COUNT = 2
 
 try:
@@ -53,57 +51,70 @@ except NotImplementedError:
 os.putenv('GST_DEBUG_DUMP_DOT_DIR', '/tmp')
 
 
-def pixbuf_to_pilbuf(buf):
-    data = list()
-    for i in range(0, len(buf)-4, 4):
-        r, g, b, x = struct.unpack('BBBB', buf[i:i + 4])
-        # XXX: can something be done with the 'X' part of RGBX?
-        data.append((r, g, b))
-    return data
-
 def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
     def pad_added(element, pad, connect_to):
-        caps = pad.get_caps()
-        name = caps[0].get_name()
+        '''This is a callback to dynamically add element to pipeline'''
+        caps = pad.query_caps(None)
+        name = caps.to_string()
         _log.debug('on_pad_added: {0}'.format(name))
         if name.startswith('video') and not connect_to.is_linked():
             pad.link(connect_to)
-    # construct pipeline: uridecodebin ! ffmpegcolorspace ! videoscale ! \
+
+    # construct pipeline: uridecodebin ! videoconvert ! videoscale ! \
     # ! CAPS ! appsink
-    pipeline = gst.Pipeline()
-    uridecodebin = gst.element_factory_make('uridecodebin')
+    pipeline = Gst.Pipeline()
+    uridecodebin = Gst.ElementFactory.make('uridecodebin', None)
     uridecodebin.set_property('uri', 'file://{0}'.format(video_path))
-    ffmpegcolorspace = gst.element_factory_make('ffmpegcolorspace')
+    videoconvert = Gst.ElementFactory.make('videoconvert', None)
     uridecodebin.connect('pad-added', pad_added,
-                         ffmpegcolorspace.get_pad('sink'))
-    videoscale = gst.element_factory_make('videoscale')
-    filter = gst.element_factory_make('capsfilter', 'filter')
+                         videoconvert.get_static_pad('sink'))
+    videoscale = Gst.ElementFactory.make('videoscale', None)
+
     # create caps for video scaling
-    caps_struct = gst.Structure('video/x-raw-rgb')
-    caps_struct.set_value('pixel-aspect-ratio', gst.Fraction(1, 1))
+    caps_struct = Gst.Structure.new_empty('video/x-raw')
+    caps_struct.set_value('pixel-aspect-ratio', Gst.Fraction(1, 1))
+    caps_struct.set_value('format', 'RGB')
     if height:
         caps_struct.set_value('height', height)
     if width:
         caps_struct.set_value('width', width)
-    caps = gst.Caps(caps_struct)
-    filter.set_property('caps', caps)
-    appsink = gst.element_factory_make('appsink')
-    pipeline.add(uridecodebin, ffmpegcolorspace, videoscale, filter, appsink)
-    gst.element_link_many(ffmpegcolorspace, videoscale, filter, appsink)
+    caps = Gst.Caps.new_empty()
+    caps.append_structure(caps_struct)
+
+    # sink everything to memory
+    appsink = Gst.ElementFactory.make('appsink', None)
+    appsink.set_property('caps', caps)
+
+    # add everything to pipeline
+    elements = [uridecodebin, videoconvert, videoscale, appsink]
+    for e in elements:
+        pipeline.add(e)
+    videoconvert.link(videoscale)
+    videoscale.link(appsink)
+
     # pipeline constructed, starting playing, but first some preparations
-    if pipeline.set_state(gst.STATE_PAUSED) == gst.STATE_CHANGE_FAILURE:
-        _log.warning('state change failed')
-    pipeline.get_state()
-    duration = pipeline.query_duration(gst.FORMAT_TIME, None)[0]
-    if duration == gst.CLOCK_TIME_NONE:
+    # seek to 50% of the file is required
+    pipeline.set_state(Gst.State.PAUSED)
+    # timeout of 3 seconds below was set experimentally
+    state = pipeline.get_state(Gst.SECOND * 3)
+    if state[0] != Gst.StateChangeReturn.SUCCESS:
+        _log.warning('state change failed, {0}'.format(state))
+        return
+
+    # get duration
+    (success, duration) = pipeline.query_duration(Gst.Format.TIME)
+    if not success:
         _log.warning('query_duration failed')
-        duration = 0  # XXX
+        return
+
     seek_to = int(duration * int(percent * 100) / 100)
     _log.debug('Seeking to {0} of {1}'.format(
-            seek_to / gst.SECOND, duration / gst.SECOND))
-    seek = pipeline.seek_simple(gst.FORMAT_TIME, gst.SEEK_FLAG_FLUSH, seek_to)
+            float(seek_to) / Gst.SECOND, float(duration) / Gst.SECOND))
+    seek = pipeline.seek_simple(Gst.Format.TIME, Gst.SeekFlags.FLUSH, seek_to)
     if not seek:
         _log.warning('seek failed')
+        return
+
     # get sample, retrieve it's format and save
     sample = appsink.emit("pull-preroll")
     if not sample:
@@ -112,16 +123,20 @@ def capture_thumb(video_path, dest_path, width=None, height=None, percent=0.5):
     caps = sample.get_caps()
     if not caps:
         _log.warning('could not get snapshot format')
+        return
     structure = caps.get_structure(0)
     (success, width) = structure.get_int('width')
     (success, height) = structure.get_int('height')
     buffer = sample.get_buffer()
+
+    # get the image from the buffer and save it to disk
     im = Image.frombytes('RGB', (width, height),
                          buffer.extract_dup(0, buffer.get_size()))
     im.save(dest_path)
     _log.info('thumbnail saved to {0}'.format(dest_path))
+
     # cleanup
-    pipeline.set_state(gst.STATE_NULL)
+    pipeline.set_state(Gst.State.NULL)
 
 
 class VideoTranscoder(object):
@@ -130,16 +145,12 @@ class VideoTranscoder(object):
 
     Transcodes the SRC video file to a VP8 WebM video file at DST
 
-     - Does the same thing as VideoThumbnailer, but produces a WebM vp8
-       and vorbis video file.
-     - The VideoTranscoder exceeds the VideoThumbnailer in the way
-       that it was refined afterwards and therefore is done more
-       correctly.
+     - Produces a WebM vp8 and vorbis video file.
     '''
     def __init__(self):
         _log.info('Initializing VideoTranscoder...')
         self.progress_percentage = None
-        self.loop = gobject.MainLoop()
+        self.loop = GObject.MainLoop()
 
     def transcode(self, src, dst, **kwargs):
         '''
@@ -172,152 +183,85 @@ class VideoTranscoder(object):
         if not type(self.destination_dimensions) == tuple:
             raise Exception('dimensions must be tuple: (width, height)')
 
-        self._setup()
-        self._run()
-
-    # XXX: This could be a static method.
-    def discover(self, src):
-        '''
-        Discover properties about a media file
-        '''
-        _log.info('Discovering {0}'.format(src))
-
-        self.source_path = src
-        self._setup_discover(discovered_callback=self.__on_discovered)
-
-        self.discoverer.discover()
-
-        self.loop.run()
-        if hasattr(self, '_discovered_data'):
-            return self._discovered_data.__dict__
-        else:
-            return None
-
-    def __on_discovered(self, data, is_media):
-        _log.debug('Discovered: {0}'.format(data))
-        if not is_media:
-            self.__stop()
-            raise Exception('Could not discover {0}'.format(self.source_path))
-
-        self._discovered_data = data
-
-        self.__stop_mainloop()
-
-    def _setup(self):
-        self._setup_discover()
         self._setup_pipeline()
-
-    def _run(self):
-        _log.info('Discovering...')
-        self.discoverer.discover()
-        _log.info('Done')
-
+        self.data = discover(self.source_path)
+        self._link_elements()
+        self.__setup_videoscale_capsfilter()
+        self.pipeline.set_state(Gst.State.PLAYING)
+        _log.info('Transcoding...')
         _log.debug('Initializing MainLoop()')
         self.loop.run()
 
-    def _setup_discover(self, **kw):
-        _log.debug('Setting up discoverer')
-        self.discoverer = discoverer.Discoverer(self.source_path)
-
-        # Connect self.__discovered to the 'discovered' event
-        self.discoverer.connect(
-            'discovered',
-            kw.get('discovered_callback', self.__discovered))
-
-    def __discovered(self, data, is_media):
-        '''
-        Callback for media discoverer.
-        '''
-        if not is_media:
-            self.__stop()
-            raise Exception('Could not discover {0}'.format(self.source_path))
-
-        _log.debug('__discovered, data: {0}'.format(data.__dict__))
-
-        self.data = data
-
-        # Launch things that should be done after discovery
-        self._link_elements()
-        self.__setup_videoscale_capsfilter()
-
-        # Tell the transcoding pipeline to start running
-        self.pipeline.set_state(gst.STATE_PLAYING)
-        _log.info('Transcoding...')
 
     def _setup_pipeline(self):
         _log.debug('Setting up transcoding pipeline')
         # Create the pipeline bin.
-        self.pipeline = gst.Pipeline('VideoTranscoderPipeline')
+        self.pipeline = Gst.Pipeline.new('VideoTranscoderPipeline')
 
         # Create all GStreamer elements, starting with
         # filesrc & decoder
-        self.filesrc = gst.element_factory_make('filesrc', 'filesrc')
+        self.filesrc = Gst.ElementFactory.make('filesrc', 'filesrc')
         self.filesrc.set_property('location', self.source_path)
         self.pipeline.add(self.filesrc)
 
-        self.decoder = gst.element_factory_make('decodebin2', 'decoder')
-        self.decoder.connect('new-decoded-pad', self._on_dynamic_pad)
+        self.decoder = Gst.ElementFactory.make('decodebin', 'decoder')
+        self.decoder.connect('pad-added', self._on_dynamic_pad)
         self.pipeline.add(self.decoder)
 
         # Video elements
-        self.videoqueue = gst.element_factory_make('queue', 'videoqueue')
+        self.videoqueue = Gst.ElementFactory.make('queue', 'videoqueue')
         self.pipeline.add(self.videoqueue)
 
-        self.videorate = gst.element_factory_make('videorate', 'videorate')
+        self.videorate = Gst.ElementFactory.make('videorate', 'videorate')
         self.pipeline.add(self.videorate)
 
-        self.ffmpegcolorspace = gst.element_factory_make(
-            'ffmpegcolorspace', 'ffmpegcolorspace')
-        self.pipeline.add(self.ffmpegcolorspace)
+        self.videoconvert = Gst.ElementFactory.make('videoconvert',
+                                                    'videoconvert')
+        self.pipeline.add(self.videoconvert)
 
-        self.videoscale = gst.element_factory_make('ffvideoscale', 'videoscale')
-        #self.videoscale.set_property('method', 2)  # I'm not sure this works
-        #self.videoscale.set_property('add-borders', 0)
+        self.videoscale = Gst.ElementFactory.make('videoscale', 'videoscale')
         self.pipeline.add(self.videoscale)
 
-        self.capsfilter = gst.element_factory_make('capsfilter', 'capsfilter')
+        self.capsfilter = Gst.ElementFactory.make('capsfilter', 'capsfilter')
         self.pipeline.add(self.capsfilter)
 
-        self.vp8enc = gst.element_factory_make('vp8enc', 'vp8enc')
-        self.vp8enc.set_property('quality', self.vp8_quality)
+        self.vp8enc = Gst.ElementFactory.make('vp8enc', 'vp8enc')
         self.vp8enc.set_property('threads', self.vp8_threads)
-        self.vp8enc.set_property('max-latency', 25)
         self.pipeline.add(self.vp8enc)
 
         # Audio elements
-        self.audioqueue = gst.element_factory_make('queue', 'audioqueue')
+        self.audioqueue = Gst.ElementFactory.make('queue', 'audioqueue')
         self.pipeline.add(self.audioqueue)
 
-        self.audiorate = gst.element_factory_make('audiorate', 'audiorate')
+        self.audiorate = Gst.ElementFactory.make('audiorate', 'audiorate')
         self.audiorate.set_property('tolerance', 80000000)
         self.pipeline.add(self.audiorate)
 
-        self.audioconvert = gst.element_factory_make('audioconvert', 'audioconvert')
+        self.audioconvert = Gst.ElementFactory.make('audioconvert', 'audioconvert')
         self.pipeline.add(self.audioconvert)
 
-        self.audiocapsfilter = gst.element_factory_make('capsfilter',
-                                                        'audiocapsfilter')
-        audiocaps = ['audio/x-raw-float']
-        self.audiocapsfilter.set_property(
-            'caps',
-            gst.caps_from_string(
-                ','.join(audiocaps)))
+        self.audiocapsfilter = Gst.ElementFactory.make('capsfilter',
+                                                       'audiocapsfilter')
+        audiocaps = Gst.Caps.new_empty()
+        audiocaps_struct = Gst.Structure.new_empty('audio/x-raw')
+        audiocaps.append_structure(audiocaps_struct)
+        self.audiocapsfilter.set_property('caps', audiocaps)
         self.pipeline.add(self.audiocapsfilter)
 
-        self.vorbisenc = gst.element_factory_make('vorbisenc', 'vorbisenc')
+        self.vorbisenc = Gst.ElementFactory.make('vorbisenc', 'vorbisenc')
         self.vorbisenc.set_property('quality', self.vorbis_quality)
         self.pipeline.add(self.vorbisenc)
 
         # WebMmux & filesink
-        self.webmmux = gst.element_factory_make('webmmux', 'webmmux')
+        self.webmmux = Gst.ElementFactory.make('webmmux', 'webmmux')
         self.pipeline.add(self.webmmux)
 
-        self.filesink = gst.element_factory_make('filesink', 'filesink')
+        self.filesink = Gst.ElementFactory.make('filesink', 'filesink')
         self.filesink.set_property('location', self.destination_path)
         self.pipeline.add(self.filesink)
 
         # Progressreport
-        self.progressreport = gst.element_factory_make(
+        self.progressreport = Gst.ElementFactory.make(
             'progressreport', 'progressreport')
         # Update every second
         self.progressreport.set_property('update-freq', 1)
@@ -336,48 +280,41 @@ class VideoTranscoder(object):
         # 'new-decoded-pad' which links decoded src pads to either a video
         # or audio sink
         self.filesrc.link(self.decoder)
-
-        # Link all the video elements in a row to webmmux
-        gst.element_link_many(
-            self.videoqueue,
-            self.videorate,
-            self.ffmpegcolorspace,
-            self.videoscale,
-            self.capsfilter,
-            self.vp8enc,
-            self.webmmux)
+        # link the rest
+        self.videoqueue.link(self.videorate)
+        self.videorate.link(self.videoconvert)
+        self.videoconvert.link(self.videoscale)
+        self.videoscale.link(self.capsfilter)
+        self.capsfilter.link(self.vp8enc)
+        self.vp8enc.link(self.webmmux)
 
         if self.data.is_audio:
-            # Link all the audio elements in a row to webmux
-            gst.element_link_many(
-                self.audioqueue,
-                self.audiorate,
-                self.audioconvert,
-                self.audiocapsfilter,
-                self.vorbisenc,
-                self.webmmux)
-
-        gst.element_link_many(
-            self.webmmux,
-            self.progressreport,
-            self.filesink)
+            # Link all the audio elements in a row to webmmux
+            self.audioqueue.link(self.audiorate)
+            self.audiorate.link(self.audioconvert)
+            self.audioconvert.link(self.audiocapsfilter)
+            self.audiocapsfilter.link(self.vorbisenc)
+            self.vorbisenc.link(self.webmmux)
+        self.webmmux.link(self.progressreport)
+        self.progressreport.link(self.filesink)
 
         # Setup the message bus and connect _on_message to the pipeline
         self._setup_bus()
 
-    def _on_dynamic_pad(self, dbin, pad, islast):
+    def _on_dynamic_pad(self, dbin, pad):
         '''
-        Callback called when ``decodebin2`` has a pad that we can connect to
+        Callback called when ``decodebin`` has a pad that we can connect to
         '''
         # Intersect the capabilities of the video sink and the pad src
         # Then check if they have no common capabilities.
-        if self.ffmpegcolorspace.get_pad_template('sink')\
-                .get_caps().intersect(pad.get_caps()).is_empty():
+        if (self.videorate.get_static_pad('sink').get_pad_template()
+                .get_caps().intersect(pad.query_caps()).is_empty()):
             # It is NOT a video src pad.
-            pad.link(self.audioqueue.get_pad('sink'))
+            pad.link(self.audioqueue.get_static_pad('sink'))
         else:
             # It IS a video src pad.
-            pad.link(self.videoqueue.get_pad('sink'))
+            _log.debug('linking video to the pad dynamically')
+            pad.link(self.videoqueue.get_static_pad('sink'))
 
     def _setup_bus(self):
         self.bus = self.pipeline.get_bus()
@@ -388,73 +325,53 @@ class VideoTranscoder(object):
         '''
         Sets up the output format (width, height) for the video
         '''
-        caps = ['video/x-raw-yuv', 'pixel-aspect-ratio=1/1', 'framerate=30/1']
-
-        if self.data.videoheight > self.data.videowidth:
-            # Whoa! We have ourselves a portrait video!
-            caps.append('height={0}'.format(
-                    self.destination_dimensions[1]))
+        caps_struct = Gst.Structure.new_empty('video/x-raw')
+        caps_struct.set_value('pixel-aspect-ratio', Gst.Fraction(1, 1))
+        caps_struct.set_value('framerate', Gst.Fraction(30, 1))
+        video_info = self.data.get_video_streams()[0]
+        if video_info.get_height() > video_info.get_width():
+            # portrait
+            caps_struct.set_value('height', self.destination_dimensions[1])
         else:
-            # It's a landscape, phew, how normal.
-            caps.append('width={0}'.format(
-                    self.destination_dimensions[0]))
-
-        self.capsfilter.set_property(
-            'caps',
-            gst.caps_from_string(
-                ','.join(caps)))
+            # landscape
+            caps_struct.set_value('width', self.destination_dimensions[0])
+        caps = Gst.Caps.new_empty()
+        caps.append_structure(caps_struct)
+        self.capsfilter.set_property('caps', caps)
 
     def _on_message(self, bus, message):
         _log.debug((bus, message, message.type))
-
-        t = message.type
-
-        if message.type == gst.MESSAGE_EOS:
-            self._discover_dst_and_stop()
-            _log.info('Done')
-
-        elif message.type == gst.MESSAGE_ELEMENT:
-            if message.structure.get_name() == 'progress':
-                data = dict(message.structure)
-                # Update progress state if it has changed
-                if self.progress_percentage != data.get('percent'):
-                    self.progress_percentage = data.get('percent')
-                    if self._progress_callback:
-                        self._progress_callback(data.get('percent'))
-
-                    _log.info('{percent}% done...'.format(
-                            percent=data.get('percent')))
-                _log.debug(data)
-
-        elif t == gst.MESSAGE_ERROR:
-            _log.error((bus, message))
+        if message.type == Gst.MessageType.EOS:
+            self.dst_data = discover(self.destination_path)
+            self.__stop()
+            _log.info('Done')
+        elif message.type == Gst.MessageType.ELEMENT:
+            if message.has_name('progress'):
+                structure = message.get_structure()
+                # Update progress state if it has changed
+                (success, percent) = structure.get_int('percent')
+                if self.progress_percentage != percent and success:
+                    self.progress_percentage = percent
+                    if self._progress_callback:
+                        self._progress_callback(percent)
+                    _log.info('{percent}% done...'.format(percent=percent))
+        elif message.type == Gst.MessageType.ERROR:
+            _log.error('Got error: {0}'.format(message.parse_error()))
             self.__stop()
-
-    def _discover_dst_and_stop(self):
-        self.dst_discoverer = discoverer.Discoverer(self.destination_path)
-
-        self.dst_discoverer.connect('discovered', self.__dst_discovered)
-
-        self.dst_discoverer.discover()
-
-    def __dst_discovered(self, data, is_media):
-        self.dst_data = data
-
-        self.__stop()
 
     def __stop(self):
         _log.debug(self.loop)
 
         if hasattr(self, 'pipeline'):
             # Stop executing the pipeline
-            self.pipeline.set_state(gst.STATE_NULL)
+            self.pipeline.set_state(Gst.State.NULL)
 
         # This kills the loop, mercifully
-        gobject.idle_add(self.__stop_mainloop)
+        GObject.idle_add(self.__stop_mainloop)
 
     def __stop_mainloop(self):
         '''
-        Wrapper for gobject.MainLoop.quit()
+        Wrapper for GObject.MainLoop.quit()
 
         This wrapper makes us able to see if self.loop.quit has been called
         '''
diff --git a/mediagoblin/media_types/video/util.py b/mediagoblin/media_types/video/util.py
index 29b7f410..4dc395b4 100644
--- a/mediagoblin/media_types/video/util.py
+++ b/mediagoblin/media_types/video/util.py
@@ -33,27 +33,33 @@ def skip_transcode(metadata, size):
     medium_config = mgg.global_config['media:medium']
 
     _log.debug('skip_transcode config: {0}'.format(config))
-
-    if config['mime_types'] and metadata.get('mimetype'):
-        if not metadata['mimetype'] in config['mime_types']:
+    tags = metadata.get_tags()
+    if config['mime_types'] and tags.get_string('mimetype'):
+        if not tags.get_string('mimetype') in config['mime_types']:
             return False
 
-    if config['container_formats'] and metadata['tags'].get('container-format'):
-        if not metadata['tags']['container-format'] in config['container_formats']:
+    if config['container_formats'] and tags.get_string('container-format'):
+        if not (metadata.get_tags().get_string('container-format') in
+                config['container_formats']):
             return False
 
-    if config['video_codecs'] and metadata['tags'].get('video-codec'):
-        if not metadata['tags']['video-codec'] in config['video_codecs']:
+    if (config['video_codecs'] and
+            metadata.get_tags().get_string('video-codec')):
+        if not (metadata.get_tags().get_string('video-codec') in
+                config['video_codecs']):
             return False
 
-    if config['audio_codecs'] and metadata['tags'].get('audio-codec'):
-        if not metadata['tags']['audio-codec'] in config['audio_codecs']:
+    if (config['audio_codecs'] and
+            metadata.get_tags().get_string('audio-codec')):
+        if not (metadata.get_tags().get_string('audio-codec') in
+                config['audio_codecs']):
             return False
 
+    video_info = metadata.get_video_streams()[0]
     if config['dimensions_match']:
-        if not metadata['videoheight'] <= size[1]:
+        if not video_info.get_height() <= size[1]:
             return False
-        if not metadata['videowidth'] <= size[0]:
+        if not video_info.get_width() <= size[0]:
             return False
 
     return True
diff --git a/mediagoblin/processing/__init__.py b/mediagoblin/processing/__init__.py
index 5a88ddea..b7e36027 100644
--- a/mediagoblin/processing/__init__.py
+++ b/mediagoblin/processing/__init__.py
@@ -378,12 +378,11 @@ def store_public(entry, keyname, local_file, target_name=None,
                   entry.media_files[keyname], target_filepath)
         if delete_if_exists:
             mgg.public_store.delete_file(entry.media_files[keyname])
-
     try:
         mgg.public_store.copy_local_to_storage(local_file, target_filepath)
-    except:
+    except Exception as e:
+        _log.error(u'Exception happened: {0}'.format(e))
         raise PublicStoreFail(keyname=keyname)
-
     # raise an error if the file failed to copy
     if not mgg.public_store.file_exists(target_filepath):
         raise PublicStoreFail(keyname=keyname)
diff --git a/mediagoblin/tests/test_video.py b/mediagoblin/tests/test_video.py
index 0fe58f60..03298b67 100644
--- a/mediagoblin/tests/test_video.py
+++ b/mediagoblin/tests/test_video.py
@@ -15,57 +15,117 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import tempfile
-import shutil
 import os
-import pytest
 from contextlib import contextmanager
-import logging
 import imghdr
 
-#TODO: this should be skipped if video plugin is not enabled
-import pygst
-pygst.require('0.10')
-import gst
+#os.environ['GST_DEBUG'] = '4,python:4'
 
-from mediagoblin.media_types.video.transcoders import capture_thumb
+#TODO: this should be skipped if video plugin is not enabled
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+from mediagoblin.media_types.video.transcoders import (capture_thumb,
+        VideoTranscoder)
+from mediagoblin.media_types.tools import discover
 
 @contextmanager
-def create_data(suffix):
+def create_data(suffix=None, make_audio=False):
     video = tempfile.NamedTemporaryFile()
-    src = gst.element_factory_make('videotestsrc')
-    src.set_property('num-buffers', 50)
-    enc = gst.element_factory_make('theoraenc')
-    mux = gst.element_factory_make('oggmux')
-    dst = gst.element_factory_make('filesink')
+    src = Gst.ElementFactory.make('videotestsrc', None)
+    src.set_property('num-buffers', 10)
+    videorate = Gst.ElementFactory.make('videorate', None)
+    enc = Gst.ElementFactory.make('theoraenc', None)
+    mux = Gst.ElementFactory.make('oggmux', None)
+    dst = Gst.ElementFactory.make('filesink', None)
     dst.set_property('location', video.name)
-    pipeline = gst.Pipeline()
-    pipeline.add(src, enc, mux, dst)
-    gst.element_link_many(src, enc, mux, dst)
-    pipeline.set_state(gst.STATE_PLAYING)
-    # wait for finish
+    pipeline = Gst.Pipeline()
+    pipeline.add(src)
+    pipeline.add(videorate)
+    pipeline.add(enc)
+    pipeline.add(mux)
+    pipeline.add(dst)
+    src.link(videorate)
+    videorate.link(enc)
+    enc.link(mux)
+    mux.link(dst)
+    if make_audio:
+        audio_src = Gst.ElementFactory.make('audiotestsrc', None)
+        audio_src.set_property('num-buffers', 10)
+        audiorate = Gst.ElementFactory.make('audiorate', None)
+        audio_enc = Gst.ElementFactory.make('vorbisenc', None)
+        pipeline.add(audio_src)
+        pipeline.add(audio_enc)
+        pipeline.add(audiorate)
+        audio_src.link(audiorate)
+        audiorate.link(audio_enc)
+        audio_enc.link(mux)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(3 * Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
     bus = pipeline.get_bus()
-    message = bus.timed_pop_filtered(gst.CLOCK_TIME_NONE,
-                                     gst.MESSAGE_ERROR | gst.MESSAGE_EOS)
-    thumb = tempfile.NamedTemporaryFile(suffix=suffix)
-    pipeline.set_state(gst.STATE_NULL)
-    yield (video.name, thumb.name)
+    message = bus.timed_pop_filtered(
+            3 * Gst.SECOND,
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    pipeline.set_state(Gst.State.NULL)
+    if suffix:
+        result = tempfile.NamedTemporaryFile(suffix=suffix)
+    else:
+        result = tempfile.NamedTemporaryFile()
+    yield (video.name, result.name)
 
 
 #TODO: this should be skipped if video plugin is not enabled
 def test_thumbnails():
     '''
     Test thumbnails generation.
-    1. Create a video from gst's videotestsrc
-    3. Capture thumbnail
-    4. Remove it
+    1. Create a video (+audio) from gst's videotestsrc
+    2. Capture thumbnail
+    3. Everything should get removed because of temp files usage
     '''
     #data  create_data() as (video_name, thumbnail_name):
     test_formats = [('.png', 'png'), ('.jpg', 'jpeg'), ('.gif', 'gif')]
     for suffix, format in test_formats:
         with create_data(suffix) as (video_name, thumbnail_name):
             capture_thumb(video_name, thumbnail_name, width=40)
-            # check if png
+            # check result file format
             assert imghdr.what(thumbnail_name) == format
             # TODO: check height and width
             # FIXME: it doesn't work with small width, say, 10px. This should be
             # fixed somehow
+    suffix, format = test_formats[0]
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=40)
+        assert imghdr.what(thumbnail_name) == format
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=10)  # smaller width
+        assert imghdr.what(thumbnail_name) == format
+    with create_data(suffix, True) as (video_name, thumbnail_name):
+        capture_thumb(video_name, thumbnail_name, width=100)  # bigger width
+        assert imghdr.what(thumbnail_name) == format
+
+
+def test_transcoder():
+    # test without audio
+    with create_data() as (video_name, result_name):
+        transcoder = VideoTranscoder()
+        transcoder.transcode(
+                video_name, result_name,
+                vp8_quality=8,
+                vp8_threads=0,  # autodetect
+                vorbis_quality=0.3,
+                dimensions=(640, 640))
+        assert len(discover(result_name).get_video_streams()) == 1
+    # test with audio
+    with create_data(make_audio=True) as (video_name, result_name):
+        transcoder = VideoTranscoder()
+        transcoder.transcode(
+                video_name, result_name,
+                vp8_quality=8,
+                vp8_threads=0,  # autodetect
+                vorbis_quality=0.3,
+                dimensions=(640, 640))
+        assert len(discover(result_name).get_video_streams()) == 1
+        assert len(discover(result_name).get_audio_streams()) == 1

From 57d8212a796e4952955c047efa61616aad006040 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Fri, 13 Jun 2014 10:02:10 +0400
Subject: [PATCH 03/13] Port of audio to GStreamer 1.0

Includes:
 - transcoders
 - thumbs
 - tests
---
 mediagoblin/media_types/audio/processing.py  |  25 +--
 mediagoblin/media_types/audio/transcoders.py | 164 ++++++++-----------
 mediagoblin/media_types/video/transcoders.py |   5 +-
 mediagoblin/tests/test_audio.py              | 104 ++++++++++++
 4 files changed, 181 insertions(+), 117 deletions(-)
 create mode 100644 mediagoblin/tests/test_audio.py

diff --git a/mediagoblin/media_types/audio/processing.py b/mediagoblin/media_types/audio/processing.py
index de6fa9ca..770342ff 100644
--- a/mediagoblin/media_types/audio/processing.py
+++ b/mediagoblin/media_types/audio/processing.py
@@ -27,6 +27,7 @@ from mediagoblin.processing import (
 
 from mediagoblin.media_types.audio.transcoders import (
     AudioTranscoder, AudioThumbnailer)
+from mediagoblin.media_types.tools import discover
 
 _log = logging.getLogger(__name__)
 
@@ -35,16 +36,9 @@ MEDIA_TYPE = 'mediagoblin.media_types.audio'
 
 def sniff_handler(media_file, filename):
     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
-    try:
-        transcoder = AudioTranscoder()
-        data = transcoder.discover(media_file.name)
-    except BadMediaFail:
-        _log.debug('Audio discovery raised BadMediaFail')
-        return None
-
-    if data.is_audio is True and data.is_video is False:
+    data = discover(media_file.name)
+    if data and data.get_audio_streams() and not data.get_video_streams():
         return MEDIA_TYPE
-
     return None
 
 
@@ -126,8 +120,6 @@ class CommonAudioProcessor(MediaProcessor):
             quality=quality,
             progress_callback=progress_callback)
 
-        self.transcoder.discover(webm_audio_tmp)
-
         self._keep_best()
 
         _log.debug('Saving medium...')
@@ -145,21 +137,14 @@ class CommonAudioProcessor(MediaProcessor):
         if self._skip_processing('spectrogram', max_width=max_width,
                                  fft_size=fft_size):
             return
-
         wav_tmp = os.path.join(self.workbench.dir, self.name_builder.fill(
             '{basename}.ogg'))
-
         _log.info('Creating OGG source for spectrogram')
-        self.transcoder.transcode(
-            self.process_filename,
-            wav_tmp,
-            mux_string='vorbisenc quality={0} ! oggmux'.format(
-                self.audio_config['quality']))
-
+        self.transcoder.transcode(self.process_filename, wav_tmp,
+                                  mux_name='oggmux')
         spectrogram_tmp = os.path.join(self.workbench.dir,
                                        self.name_builder.fill(
                                            '{basename}-spectrogram.jpg'))
-
         self.thumbnailer.spectrogram(
             wav_tmp,
             spectrogram_tmp,
diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py
index 150dad8e..f86528de 100644
--- a/mediagoblin/media_types/audio/transcoders.py
+++ b/mediagoblin/media_types/audio/transcoders.py
@@ -20,10 +20,8 @@ try:
 except ImportError:
     import Image
 
-from mediagoblin.processing import BadMediaFail
 from mediagoblin.media_types.audio import audioprocessing
 
-
 _log = logging.getLogger(__name__)
 
 CPU_COUNT = 2  # Just assuming for now
@@ -39,26 +37,13 @@ try:
 except ImportError:
     _log.warning('Could not import multiprocessing, assuming 2 CPU cores')
 
-# IMPORT GOBJECT
-try:
-    import gobject
-    gobject.threads_init()
-except ImportError:
-    raise Exception('gobject could not be found')
+# uncomment this to get a lot of logs from gst
+# import os;os.environ['GST_DEBUG'] = '5,python:5'
 
-# IMPORT PYGST
-try:
-    import pygst
-
-    # We won't settle for less. For now, this is an arbitrary limit
-    # as we have not tested with > 0.10
-    pygst.require('0.10')
-
-    import gst
-
-    import gst.extend.discoverer
-except ImportError:
-    raise Exception('gst/pygst >= 0.10 could not be imported')
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import GObject, Gst
+Gst.init(None)
 
 import numpy
 
@@ -72,7 +57,6 @@ class AudioThumbnailer(object):
         height = int(kw.get('height', float(width) * 0.3))
         fft_size = kw.get('fft_size', 2048)
         callback = kw.get('progress_callback')
-
         processor = audioprocessing.AudioProcessor(
             src,
             fft_size,
@@ -132,95 +116,87 @@ class AudioTranscoder(object):
         _log.info('Initializing {0}'.format(self.__class__.__name__))
 
         # Instantiate MainLoop
-        self._loop = gobject.MainLoop()
+        self._loop = GObject.MainLoop()
         self._failed = None
 
-    def discover(self, src):
-        self._src_path = src
-        _log.info('Discovering {0}'.format(src))
-        self._discovery_path = src
-
-        self._discoverer = gst.extend.discoverer.Discoverer(
-            self._discovery_path)
-        self._discoverer.connect('discovered', self.__on_discovered)
-        self._discoverer.discover()
-
-        self._loop.run()  # Run MainLoop
-
-        if self._failed:
-            raise self._failed
-
-        # Once MainLoop has returned, return discovery data
-        return getattr(self, '_discovery_data', False)
-
-    def __on_discovered(self, data, is_media):
-        if not is_media:
-            self._failed = BadMediaFail()
-            _log.error('Could not discover {0}'.format(self._src_path))
-            self.halt()
-
-        _log.debug('Discovered: {0}'.format(data.__dict__))
-
-        self._discovery_data = data
-
-        # Gracefully shut down MainLoop
-        self.halt()
-
-    def transcode(self, src, dst, **kw):
+    def transcode(self, src, dst, mux_name='webmmux',quality=0.3,
+                  progress_callback=None, **kw):
+        def _on_pad_added(element, pad, connect_to):
+            caps = pad.query_caps(None)
+            name = caps.to_string()
+            _log.debug('on_pad_added: {0}'.format(name))
+            if name.startswith('audio') and not connect_to.is_linked():
+                pad.link(connect_to)
         _log.info('Transcoding {0} into {1}'.format(src, dst))
-        self._discovery_data = kw.get('data', self.discover(src))
-
-        self.__on_progress = kw.get('progress_callback')
-
-        quality = kw.get('quality', 0.3)
-
-        mux_string = kw.get(
-            'mux_string',
-            'vorbisenc quality={0} ! webmmux'.format(quality))
-
+        self.__on_progress = progress_callback
         # Set up pipeline
-        self.pipeline = gst.parse_launch(
-            'filesrc location="{src}" ! '
-            'decodebin2 ! queue ! audiorate tolerance={tolerance} ! '
-            'audioconvert ! audio/x-raw-float,channels=2 ! '
-            '{mux_string} ! '
-            'progressreport silent=true ! '
-            'filesink location="{dst}"'.format(
-                src=src,
-                tolerance=80000000,
-                mux_string=mux_string,
-                dst=dst))
-
+        tolerance = 80000000
+        self.pipeline = Gst.Pipeline()
+        filesrc = Gst.ElementFactory.make('filesrc', 'filesrc')
+        filesrc.set_property('location', src)
+        decodebin = Gst.ElementFactory.make('decodebin', 'decodebin')
+        queue = Gst.ElementFactory.make('queue', 'queue')
+        decodebin.connect('pad-added', _on_pad_added,
+                          queue.get_static_pad('sink'))
+        audiorate = Gst.ElementFactory.make('audiorate', 'audiorate')
+        audiorate.set_property('tolerance', tolerance)
+        audioconvert = Gst.ElementFactory.make('audioconvert', 'audioconvert')
+        caps_struct = Gst.Structure.new_empty('audio/x-raw')
+        caps_struct.set_value('channels', 2)
+        caps = Gst.Caps.new_empty()
+        caps.append_structure(caps_struct)
+        capsfilter = Gst.ElementFactory.make('capsfilter', 'capsfilter')
+        capsfilter.set_property('caps', caps)
+        enc = Gst.ElementFactory.make('vorbisenc', 'enc')
+        enc.set_property('quality', quality)
+        mux = Gst.ElementFactory.make(mux_name, 'mux')
+        progressreport = Gst.ElementFactory.make('progressreport', 'progress')
+        progressreport.set_property('silent', True)
+        sink = Gst.ElementFactory.make('filesink', 'sink')
+        sink.set_property('location', dst)
+        # add to pipeline
+        for e in [filesrc, decodebin, queue, audiorate, audioconvert,
+                  capsfilter, enc, mux, progressreport, sink]:
+            self.pipeline.add(e)
+        # link elements
+        filesrc.link(decodebin)
+        decodebin.link(queue)
+        queue.link(audiorate)
+        audiorate.link(audioconvert)
+        audioconvert.link(capsfilter)
+        capsfilter.link(enc)
+        enc.link(mux)
+        mux.link(progressreport)
+        progressreport.link(sink)
         self.bus = self.pipeline.get_bus()
         self.bus.add_signal_watch()
         self.bus.connect('message', self.__on_bus_message)
-
-        self.pipeline.set_state(gst.STATE_PLAYING)
-
+        # run
+        self.pipeline.set_state(Gst.State.PLAYING)
         self._loop.run()
 
     def __on_bus_message(self, bus, message):
-        _log.debug(message)
-
-        if (message.type == gst.MESSAGE_ELEMENT
-            and message.structure.get_name() == 'progress'):
-            data = dict(message.structure)
-
-            if self.__on_progress:
-                self.__on_progress(data.get('percent'))
-
-            _log.info('{0}% done...'.format(
-                    data.get('percent')))
-        elif message.type == gst.MESSAGE_EOS:
+        _log.debug(message.type)
+        if (message.type == Gst.MessageType.ELEMENT
+                and message.has_name('progress')):
+            structure = message.get_structure()
+            (success, percent) = structure.get_int('percent')
+            if self.__on_progress and success:
+                self.__on_progress(percent)
+            _log.info('{0}% done...'.format(percent))
+        elif message.type == Gst.MessageType.EOS:
             _log.info('Done')
             self.halt()
+        elif message.type == Gst.MessageType.ERROR:
+            _log.error(message.parse_error())
+            self.halt()
 
     def halt(self):
         if getattr(self, 'pipeline', False):
-            self.pipeline.set_state(gst.STATE_NULL)
+            self.pipeline.set_state(Gst.State.NULL)
             del self.pipeline
         _log.info('Quitting MainLoop gracefully...')
-        gobject.idle_add(self._loop.quit)
+        GObject.idle_add(self._loop.quit)
 
 if __name__ == '__main__':
     import sys
diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index d53cabc6..20f21697 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -239,7 +239,6 @@ class VideoTranscoder(object):
 
         self.audioconvert = Gst.ElementFactory.make('audioconvert', 'audioconvert')
         self.pipeline.add(self.audioconvert)
-
         self.audiocapsfilter = Gst.ElementFactory.make('capsfilter',
                                                        'audiocapsfilter')
         audiocaps = Gst.Caps.new_empty()
@@ -288,8 +287,7 @@ class VideoTranscoder(object):
         self.capsfilter.link(self.vp8enc)
         self.vp8enc.link(self.webmmux)
 
-        if self.data.is_audio:
-            # Link all the audio elements in a row to webmmux
+        if self.data.get_audio_streams():
             self.audioqueue.link(self.audiorate)
             self.audiorate.link(self.audioconvert)
             self.audioconvert.link(self.audiocapsfilter)
@@ -310,6 +308,7 @@ class VideoTranscoder(object):
         if (self.videorate.get_static_pad('sink').get_pad_template()
                 .get_caps().intersect(pad.query_caps()).is_empty()):
             # It is NOT a video src pad.
+            _log.debug('linking audio to the pad dynamically')
             pad.link(self.audioqueue.get_static_pad('sink'))
         else:
             # It IS a video src pad.
diff --git a/mediagoblin/tests/test_audio.py b/mediagoblin/tests/test_audio.py
new file mode 100644
index 00000000..740d9cdd
--- /dev/null
+++ b/mediagoblin/tests/test_audio.py
@@ -0,0 +1,104 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import tempfile
+import shutil
+import os
+import pytest
+from contextlib import contextmanager
+import logging
+import imghdr
+
+#os.environ['GST_DEBUG'] = '4,python:4'
+
+#TODO: this should be skipped if video plugin is not enabled
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+from mediagoblin.media_types.audio.transcoders import (AudioTranscoder,
+        AudioThumbnailer)
+from mediagoblin.media_types.tools import discover
+
+
+@contextmanager
+def create_audio():
+    audio = tempfile.NamedTemporaryFile()
+    src = Gst.ElementFactory.make('audiotestsrc', None)
+    src.set_property('num-buffers', 50)
+    enc = Gst.ElementFactory.make('flacenc', None)
+    dst = Gst.ElementFactory.make('filesink', None)
+    dst.set_property('location', audio.name)
+    pipeline = Gst.Pipeline()
+    pipeline.add(src)
+    pipeline.add(enc)
+    pipeline.add(dst)
+    src.link(enc)
+    enc.link(dst)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(3 * Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
+    bus = pipeline.get_bus()
+    bus.timed_pop_filtered(
+            3 * Gst.SECOND,
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    pipeline.set_state(Gst.State.NULL)
+    yield (audio.name)
+
+
+@contextmanager
+def create_data_for_test():
+    with create_audio() as audio_name:
+        second_file = tempfile.NamedTemporaryFile()
+        yield (audio_name, second_file.name)
+
+
+def test_transcoder():
+    '''
+    Tests AudioTransocder's transcode method
+    '''
+    transcoder = AudioTranscoder()
+    with create_data_for_test() as (audio_name, result_name):
+        transcoder.transcode(audio_name, result_name, quality=0.3,
+                             progress_callback=None)
+        info = discover(result_name)
+        assert len(info.get_audio_streams()) == 1
+        transcoder.transcode(audio_name, result_name, quality=0.3,
+                             mux_name='oggmux', progress_callback=None)
+        info = discover(result_name)
+        assert len(info.get_audio_streams()) == 1
+
+
+def test_thumbnails():
+    '''Test thumbnails generation.
+
+    The code below heavily repeats
+    audio.processing.CommonAudioProcessor.create_spectrogram
+    1. Create test audio
+    2. Convert it to OGG source for spectogram using transcoder
+    3. Create spectogram in jpg
+
+    '''
+    thumbnailer = AudioThumbnailer()
+    transcoder = AudioTranscoder()
+    with create_data_for_test() as (audio_name, new_name):
+        transcoder.transcode(audio_name, new_name, mux_name='oggmux')
+        thumbnail = tempfile.NamedTemporaryFile(suffix='.jpg')
+        # fft_size below is copypasted from config_spec.ini
+        thumbnailer.spectrogram(new_name, thumbnail.name, width=100,
+                                fft_size=4096)
+        assert imghdr.what(thumbnail.name) == 'jpeg'

From 919cd2fd4101e01a5295cb08ce0947e47fc425bb Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Mon, 16 Feb 2015 05:05:41 +0300
Subject: [PATCH 04/13] added docs about new gstreamer

---
 docs/source/siteadmin/media-types.rst | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/docs/source/siteadmin/media-types.rst b/docs/source/siteadmin/media-types.rst
index f8030081..b8157e59 100644
--- a/docs/source/siteadmin/media-types.rst
+++ b/docs/source/siteadmin/media-types.rst
@@ -79,12 +79,14 @@ good/bad/ugly).  On Debianoid systems
 
 .. code-block:: bash
 
-    sudo apt-get install python-gst0.10 \
-        gstreamer0.10-plugins-base \
-        gstreamer0.10-plugins-bad \
-        gstreamer0.10-plugins-good \
-        gstreamer0.10-plugins-ugly \
-        gstreamer0.10-ffmpeg
+    sudo apt-get install python-gi python3-gi \
+        gstreamer1.0-tools \
+        gir1.2-gstreamer-1.0 \
+        gir1.2-gst-plugins-base-1.0 \
+        gstreamer1.0-plugins-good \
+        gstreamer1.0-plugins-ugly \
+        gstreamer1.0-plugins-bad \
+        gstreamer1.0-libav
 
 
 Add ``[[mediagoblin.media_types.video]]`` under the ``[plugins]`` section in
@@ -206,7 +208,7 @@ It may work on some earlier versions, but that is not guaranteed (and
 is surely not to work prior to Blender 2.5X).
 
 Add ``[[mediagoblin.media_types.stl]]`` under the ``[plugins]`` section in your
-``mediagoblin_local.ini`` and restart MediaGoblin. 
+``mediagoblin_local.ini`` and restart MediaGoblin.
 
 Run
 
@@ -255,7 +257,7 @@ This feature has been tested on Fedora with:
 It may work on some earlier versions, but that is not guaranteed.
 
 Add ``[[mediagoblin.media_types.pdf]]`` under the ``[plugins]`` section in your
-``mediagoblin_local.ini`` and restart MediaGoblin. 
+``mediagoblin_local.ini`` and restart MediaGoblin.
 
 Run
 

From c41705bffc2bcee3e283f39f2c479f8c852e6b8b Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Fri, 20 Jun 2014 07:18:33 +0400
Subject: [PATCH 05/13] added a/v submission testing

---
 mediagoblin/tests/media_tools.py       | 61 ++++++++++++++++++++++++++
 mediagoblin/tests/test_mgoblin_app.ini |  2 +
 mediagoblin/tests/test_submission.py   | 19 ++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 mediagoblin/tests/media_tools.py

diff --git a/mediagoblin/tests/media_tools.py b/mediagoblin/tests/media_tools.py
new file mode 100644
index 00000000..8d58c024
--- /dev/null
+++ b/mediagoblin/tests/media_tools.py
@@ -0,0 +1,61 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import tempfile
+
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
+@contextmanager
+def create_av(make_video=False, make_audio=False):
+    'creates audio/video in `path`, throws AssertionError on any error'
+    media = tempfile.NamedTemporaryFile(suffix='.ogg')
+    pipeline = Gst.Pipeline()
+    mux = Gst.ElementFactory.make('oggmux', 'mux')
+    pipeline.add(mux)
+    if make_video:
+        video_src = Gst.ElementFactory.make('videotestsrc', 'video_src')
+        video_src.set_property('num-buffers', 20)
+        video_enc = Gst.ElementFactory.make('theoraenc', 'video_enc')
+        pipeline.add(video_src)
+        pipeline.add(video_enc)
+        assert video_src.link(video_enc)
+        assert video_enc.link(mux)
+    if make_audio:
+        audio_src = Gst.ElementFactory.make('audiotestsrc', 'audio_src')
+        audio_src.set_property('num-buffers', 20)
+        audio_enc = Gst.ElementFactory.make('vorbisenc', 'audio_enc')
+        pipeline.add(audio_src)
+        pipeline.add(audio_enc)
+        assert audio_src.link(audio_enc)
+        assert audio_enc.link(mux)
+    sink = Gst.ElementFactory.make('filesink', 'sink')
+    sink.set_property('location', media.name)
+    pipeline.add(sink)
+    mux.link(sink)
+    pipeline.set_state(Gst.State.PLAYING)
+    state = pipeline.get_state(Gst.SECOND)
+    assert state[0] == Gst.StateChangeReturn.SUCCESS
+    bus = pipeline.get_bus()
+    message = bus.timed_pop_filtered(
+            Gst.SECOND,  # one second should be more than enough for 50-buf vid
+            Gst.MessageType.ERROR | Gst.MessageType.EOS)
+    assert message.type == Gst.MessageType.EOS
+    pipeline.set_state(Gst.State.NULL)
+    yield media.name
diff --git a/mediagoblin/tests/test_mgoblin_app.ini b/mediagoblin/tests/test_mgoblin_app.ini
index 4cd3d9b6..6bc450cb 100644
--- a/mediagoblin/tests/test_mgoblin_app.ini
+++ b/mediagoblin/tests/test_mgoblin_app.ini
@@ -37,4 +37,6 @@ BROKER_URL = "sqlite:///%(here)s/test_user_dev/kombu.db"
 [[mediagoblin.plugins.basic_auth]]
 [[mediagoblin.plugins.openid]]
 [[mediagoblin.media_types.image]]
+[[mediagoblin.media_types.video]]
+[[mediagoblin.media_types.audio]]
 [[mediagoblin.media_types.pdf]]
diff --git a/mediagoblin/tests/test_submission.py b/mediagoblin/tests/test_submission.py
index 03d255fb..65c4b3a3 100644
--- a/mediagoblin/tests/test_submission.py
+++ b/mediagoblin/tests/test_submission.py
@@ -26,7 +26,14 @@ import pytest
 
 import six.moves.urllib.parse as urlparse
 
+# this gst initialization stuff is really required here
+import gi
+gi.require_version('Gst', '1.0')
+from gi.repository import Gst
+Gst.init(None)
+
 from mediagoblin.tests.tools import fixture_add_user
+from .media_tools import create_av
 from mediagoblin import mg_globals
 from mediagoblin.db.models import MediaEntry, User
 from mediagoblin.db.base import Session
@@ -365,6 +372,18 @@ class TestSubmission:
         media = self.check_media(None, {"title": u"With GPS data"}, 1)
         assert media.get_location.position["latitude"] == 59.336666666666666
 
+    def test_audio(self):
+        with create_av(make_audio=True) as path:
+            self.check_normal_upload('Audio', path)
+
+    def test_video(self):
+        with create_av(make_video=True) as path:
+            self.check_normal_upload('Video', path)
+
+    def test_audio_and_video(self):
+        with create_av(make_audio=True, make_video=True) as path:
+            self.check_normal_upload('Audio and Video', path)
+
     def test_processing(self):
         public_store_dir = mg_globals.global_config[
             'storage:publicstore']['base_dir']

From 067ee131885c2c3df88cb75e1c41a33d6681752f Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Sun, 20 Jul 2014 19:33:18 +0400
Subject: [PATCH 06/13] updated function docs

---
 mediagoblin/processing/task.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mediagoblin/processing/task.py b/mediagoblin/processing/task.py
index 0c254767..5e0e772d 100644
--- a/mediagoblin/processing/task.py
+++ b/mediagoblin/processing/task.py
@@ -74,8 +74,11 @@ class ProcessMedia(celery.Task):
         Pass the media entry off to the appropriate processing function
         (for now just process_image...)
 
+        :param media_id: MediaEntry().id
         :param feed_url: The feed URL that the PuSH server needs to be
             updated for.
+        :param reprocess_action: What particular action should be run. For
+            example, 'initial'.
         :param reprocess: A dict containing all of the necessary reprocessing
             info for the media_type.
         """

From 54b4b28f84444a5b4e95eca0c2ca6429d52573c3 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Sun, 27 Jul 2014 07:25:14 +0400
Subject: [PATCH 07/13] Add new hook for two-step media type checking

Before uploaded media files were checked by extension. This led to
situations when a plugin can support file with specific extension but
doesn't due to lack of codecs, for example. Since the plugin reported
that it supports uploaded file type, the upload was being declared
successful, but transcoding failed.

The failures were not easy to debug.

The change adds a new hook that could allow two-step checking of the
content. The result of the hook execution returns a tuple with
media type name, manager and a callable sniffer, that can be used to
perform probably expensive checks of the content.

Also the change adds implementation of the hook for video.
---
 mediagoblin/media_types/__init__.py         | 116 +++++++++++++++-----
 mediagoblin/media_types/tools.py            |   8 +-
 mediagoblin/media_types/video/__init__.py   |  10 +-
 mediagoblin/media_types/video/processing.py |  36 ++++--
 mediagoblin/plugins/api/views.py            |  16 +--
 mediagoblin/submit/views.py                 |  17 +--
 6 files changed, 127 insertions(+), 76 deletions(-)

diff --git a/mediagoblin/media_types/__init__.py b/mediagoblin/media_types/__init__.py
index 2e392317..ab39fa36 100644
--- a/mediagoblin/media_types/__init__.py
+++ b/mediagoblin/media_types/__init__.py
@@ -23,10 +23,18 @@ from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
 
 _log = logging.getLogger(__name__)
 
+
 class FileTypeNotSupported(Exception):
     pass
 
-class InvalidFileType(Exception):
+
+class TypeNotFound(FileTypeNotSupported):
+    '''Raised if no mediagoblin plugin supporting this file type was found'''
+    pass
+
+
+class MissingComponents(FileTypeNotSupported):
+    '''Raised if plugin found, but it can't process the file for some reason'''
     pass
 
 
@@ -50,40 +58,30 @@ class MediaManagerBase(object):
         return hasattr(self, i)
 
 
-def sniff_media(media_file, filename):
+def sniff_media_contents(media_file, filename):
     '''
-    Iterate through the enabled media types and find those suited
-    for a certain file.
+    Check media contents using 'expensive' scanning. For example, for video it
+    is checking the contents using gstreamer
+    :param media_file: file-like object with 'name' attribute
+    :param filename: expected filename of the media
     '''
-
-    try:
-        return get_media_type_and_manager(filename)
-    except FileTypeNotSupported:
-        _log.info('No media handler found by file extension. Doing it the expensive way...')
-        # Create a temporary file for sniffers suchs as GStreamer-based
-        # Audio video
-        tmp_media_file = tempfile.NamedTemporaryFile()
-        tmp_media_file.write(media_file.read())
-        tmp_media_file.seek(0)
-        media_file.seek(0)
-
-        media_type = hook_handle('sniff_handler', tmp_media_file, filename)
-        if media_type:
-            _log.info('{0} accepts the file'.format(media_type))
-            return media_type, hook_handle(('media_manager', media_type))
-        else:
-            _log.debug('{0} did not accept the file'.format(media_type))
-
-    raise FileTypeNotSupported(
-        # TODO: Provide information on which file types are supported
-        _(u'Sorry, I don\'t support that file type :('))
-
+    media_type = hook_handle('sniff_handler', media_file, filename)
+    if media_type:
+        _log.info('{0} accepts the file'.format(media_type))
+        return media_type, hook_handle(('media_manager', media_type))
+    else:
+        _log.debug('{0} did not accept the file'.format(media_type))
+        raise FileTypeNotSupported(
+            # TODO: Provide information on which file types are supported
+            _(u'Sorry, I don\'t support that file type :('))
 
 def get_media_type_and_manager(filename):
     '''
     Try to find the media type based on the file name, extension
     specifically. This is used as a speedup, the sniffing functionality
     then falls back on more in-depth bitsniffing of the source file.
+
+    This hook is deprecated, 'type_match_handler' should be used instead
     '''
     if filename.find('.') > 0:
         # Get the file extension
@@ -97,5 +95,67 @@ def get_media_type_and_manager(filename):
         _log.info('File {0} has no file extension, let\'s hope the sniffers get it.'.format(
             filename))
 
-    raise FileTypeNotSupported(
+    raise TypeNotFound(
         _(u'Sorry, I don\'t support that file type :('))
+
+def type_match_handler(media_file, filename):
+    '''Check media file by name and then by content
+
+    Try to find the media type based on the file name, extension
+    specifically. After that, if media type is one of supported ones, check the
+    contents of the file
+    '''
+    if filename.find('.') > 0:
+        # Get the file extension
+        ext = os.path.splitext(filename)[1].lower()
+
+        # Omit the dot from the extension and match it against
+        # the media manager
+        hook_result = hook_handle('type_match_handler', ext[1:])
+        if hook_result:
+            _log.info('Info about file found, checking further')
+            MEDIA_TYPE, Manager, sniffer = hook_result
+            if not sniffer:
+                _log.debug('sniffer is None, plugin trusts the extension')
+                return MEDIA_TYPE, Manager
+            _log.info('checking the contents with sniffer')
+            try:
+                sniffer(media_file)
+                _log.info('checked, found')
+                return MEDIA_TYPE, Manager
+            except Exception as e:
+                _log.info('sniffer says it will not accept the file')
+                _log.debug(e)
+                raise
+        else:
+            _log.info('No plugins handled extension {0}'.format(ext))
+    else:
+        _log.info('File {0} has no known file extension, let\'s hope '
+                'the sniffers get it.'.format(filename))
+    raise TypeNotFound(_(u'Sorry, I don\'t support that file type :('))
+
+
+def sniff_media(media_file, filename):
+    '''
+    Iterate through the enabled media types and find those suited
+    for a certain file.
+    '''
+    # copy the contents to a .name-enabled temporary file for further checks
+    # TODO: there are cases when copying is not required
+    tmp_media_file = tempfile.NamedTemporaryFile()
+    media_file.save(tmp_media_file.name)
+    media_file.seek(0)
+    try:
+        return type_match_handler(tmp_media_file, filename)
+    except TypeNotFound as e:
+        _log.info('No plugins using two-step checking found')
+
+    # keep trying, using old `get_media_type_and_manager`
+    try:
+        return get_media_type_and_manager(filename)
+    except TypeNotFound as e:
+        # again, no luck. Do it expensive way
+        _log.info('No media handler found by file extension')
+    _log.info('Doing it the expensive way...')
+    return sniff_media_contents(tmp_media_file, filename)
+
diff --git a/mediagoblin/media_types/tools.py b/mediagoblin/media_types/tools.py
index 0822f51c..c3b3a3f0 100644
--- a/mediagoblin/media_types/tools.py
+++ b/mediagoblin/media_types/tools.py
@@ -39,10 +39,4 @@ def discover(src):
     _log.info('Discovering {0}...'.format(src))
     uri = 'file://{0}'.format(src)
     discoverer = GstPbutils.Discoverer.new(60 * Gst.SECOND)
-    try:
-        info = discoverer.discover_uri(uri)
-    except GLib.GError as e:
-        _log.warning(u'Exception: {0}'.format(e))
-        info = None
-    _log.info('Done')
-    return info
+    return discoverer.discover_uri(uri)
diff --git a/mediagoblin/media_types/video/__init__.py b/mediagoblin/media_types/video/__init__.py
index c85cc0b2..f636f1ab 100644
--- a/mediagoblin/media_types/video/__init__.py
+++ b/mediagoblin/media_types/video/__init__.py
@@ -15,8 +15,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from mediagoblin.media_types import MediaManagerBase
-from mediagoblin.media_types.video.processing import VideoProcessingManager, \
-    sniff_handler
+from mediagoblin.media_types.video.processing import (VideoProcessingManager,
+        sniff_handler, sniffer)
 
 
 MEDIA_TYPE = 'mediagoblin.media_types.video'
@@ -38,8 +38,12 @@ def get_media_type_and_manager(ext):
     if ext in ACCEPTED_EXTENSIONS:
         return MEDIA_TYPE, VideoMediaManager
 
+def type_match_handler(ext):
+    if ext in ACCEPTED_EXTENSIONS:
+        return MEDIA_TYPE, VideoMediaManager, sniffer
+
 hooks = {
-    'get_media_type_and_manager': get_media_type_and_manager,
+    'type_match_handler': type_match_handler,
     'sniff_handler': sniff_handler,
     ('media_manager', MEDIA_TYPE): lambda: VideoMediaManager,
     ('reprocess_manager', MEDIA_TYPE): lambda: VideoProcessingManager,
diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index 588af282..bd4c09d0 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -27,6 +27,7 @@ from mediagoblin.processing import (
     get_process_filename, store_public,
     copy_original)
 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
+from mediagoblin.media_types import MissingComponents
 
 from . import transcoders
 from .util import skip_transcode
@@ -44,24 +45,34 @@ class VideoTranscodingFail(BaseProcessingFail):
     general_message = _(u'Video transcoding failed')
 
 
-EXCLUDED_EXTS = ["nef", "cr2"]
-
-def sniff_handler(media_file, filename):
-    data = transcoders.discover(media_file.name)
-
+def sniffer(media_file):
+    '''New style sniffer, used in two-steps check; requires to have .name'''
     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
+    try:
+        data = transcoders.discover(media_file.name)
+    except Exception as e:
+        # this is usually GLib.GError, but we don't really care which one
+        _log.debug(u'GStreamer: {0}'.format(unicode(e)))
+        raise MissingComponents(u'GStreamer: {0}'.format(unicode(e)))
     _log.debug('Discovered: {0}'.format(data))
 
-    if not data:
+    if not data.get_video_streams():
+        raise MissingComponents('No video streams found in this video')
+
+    if data.get_result() != 0:  # it's 0 if success
+        name = data.get_misc().get_string('name')  # XXX: is there always name?
+        raise MissingComponents(u'{0} is missing'.format(name))
+
+    return MEDIA_TYPE
+
+
+def sniff_handler(media_file, filename):
+    try:
+        return sniffer(media_file)
+    except:
         _log.error('Could not discover {0}'.format(filename))
         return None
 
-    if data.get_video_streams():
-        return MEDIA_TYPE
-
-    return None
-
-
 def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
@@ -212,6 +223,7 @@ class CommonVideoProcessor(MediaProcessor):
 
         # Extract metadata and keep a record of it
         metadata = transcoders.discover(self.process_filename)
+
         # metadata's stream info here is a DiscovererContainerInfo instance,
         # it gets split into DiscovererAudioInfo and DiscovererVideoInfo;
         # metadata itself has container-related data in tags, like video-codec
diff --git a/mediagoblin/plugins/api/views.py b/mediagoblin/plugins/api/views.py
index ef0b87e3..23341065 100644
--- a/mediagoblin/plugins/api/views.py
+++ b/mediagoblin/plugins/api/views.py
@@ -26,8 +26,7 @@ from mediagoblin.tools.translate import pass_to_ugettext as _
 from mediagoblin.tools.response import json_response
 from mediagoblin.decorators import require_active_login
 from mediagoblin.meddleware.csrf import csrf_exempt
-from mediagoblin.media_types import \
-    InvalidFileType, FileTypeNotSupported
+from mediagoblin.media_types import FileTypeNotSupported
 from mediagoblin.plugins.api.tools import api_auth, get_entry_serializable
 from mediagoblin.submit.lib import \
     check_file_field, submit_media, get_upload_file_limits, \
@@ -83,17 +82,8 @@ def post_entry(request):
     except UserPastUploadLimit:
         raise BadRequest(
             _('Sorry, you have reached your upload limit.'))
-
-    except Exception as e:
-        '''
-        This section is intended to catch exceptions raised in
-        mediagoblin.media_types
-        '''
-        if isinstance(e, InvalidFileType) or \
-                isinstance(e, FileTypeNotSupported):
-            raise BadRequest(six.text_type(e))
-        else:
-            raise
+    except FileTypeNotSupported as e:
+        raise BadRequest(e)
 
 
 @api_auth
diff --git a/mediagoblin/submit/views.py b/mediagoblin/submit/views.py
index b0588599..ccdd70bc 100644
--- a/mediagoblin/submit/views.py
+++ b/mediagoblin/submit/views.py
@@ -29,8 +29,7 @@ from mediagoblin.tools.response import render_to_response, redirect
 from mediagoblin.decorators import require_active_login, user_has_privilege
 from mediagoblin.submit import forms as submit_forms
 from mediagoblin.messages import add_message, SUCCESS
-from mediagoblin.media_types import \
-    InvalidFileType, FileTypeNotSupported
+from mediagoblin.media_types import FileTypeNotSupported
 from mediagoblin.submit.lib import \
     check_file_field, submit_media, get_upload_file_limits, \
     FileUploadLimit, UserUploadLimit, UserPastUploadLimit
@@ -89,18 +88,10 @@ def submit_start(request):
                     _('Sorry, you have reached your upload limit.'))
                 return redirect(request, "mediagoblin.user_pages.user_home",
                                 user=request.user.username)
-
+            except FileTypeNotSupported as e:
+                submit_form.file.errors.append(e)
             except Exception as e:
-                '''
-                This section is intended to catch exceptions raised in
-                mediagoblin.media_types
-                '''
-                if isinstance(e, InvalidFileType) or \
-                        isinstance(e, FileTypeNotSupported):
-                    submit_form.file.errors.append(
-                        e)
-                else:
-                    raise
+                raise
 
     return render_to_response(
         request,

From 945a1c56677e3187bb0e3584d7e5a57e661f55ac Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Mon, 4 Aug 2014 11:59:37 +0400
Subject: [PATCH 08/13] a tiny fix to function doc

---
 mediagoblin/processing/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mediagoblin/processing/__init__.py b/mediagoblin/processing/__init__.py
index b7e36027..aa8f1447 100644
--- a/mediagoblin/processing/__init__.py
+++ b/mediagoblin/processing/__init__.py
@@ -309,8 +309,8 @@ def mark_entry_failed(entry_id, exc):
     store extra information that can be useful for users telling them
     why their media failed to process.
 
-    Args:
-     - entry_id: The id of the media entry
+    :param entry_id: The id of the media entry
+    :param exc: An instance of BaseProcessingFail
 
     """
     # Was this a BaseProcessingFail?  In other words, was this a

From 2d1e89055d52023a7600f35dd41564019f4d70c3 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Fri, 8 Aug 2014 06:09:28 +0400
Subject: [PATCH 09/13] Added multiple a-v streams handling

Before only single video stream files were suported.

This patch adds support for files with multiple video streams. Metadata
of such files is now correctly stored. This required change of the
schema used to store info and the change is done in a migration.
---
 mediagoblin/media_types/audio/processing.py |   6 +-
 mediagoblin/media_types/video/migrations.py |  61 +++++++++++
 mediagoblin/media_types/video/models.py     |  15 ++-
 mediagoblin/media_types/video/processing.py | 113 +++++++++++---------
 mediagoblin/media_types/video/util.py       |  30 +++---
 5 files changed, 151 insertions(+), 74 deletions(-)

diff --git a/mediagoblin/media_types/audio/processing.py b/mediagoblin/media_types/audio/processing.py
index 770342ff..c1f6cb6b 100644
--- a/mediagoblin/media_types/audio/processing.py
+++ b/mediagoblin/media_types/audio/processing.py
@@ -36,7 +36,11 @@ MEDIA_TYPE = 'mediagoblin.media_types.audio'
 
 def sniff_handler(media_file, filename):
     _log.info('Sniffing {0}'.format(MEDIA_TYPE))
-    data = discover(media_file.name)
+    try:
+        data = discover(media_file.name)
+    except Exception as e:
+        _log.info(unicode(e))
+        return None
     if data and data.get_audio_streams() and not data.get_video_streams():
         return MEDIA_TYPE
     return None
diff --git a/mediagoblin/media_types/video/migrations.py b/mediagoblin/media_types/video/migrations.py
index d68e2933..8088220b 100644
--- a/mediagoblin/media_types/video/migrations.py
+++ b/mediagoblin/media_types/video/migrations.py
@@ -18,6 +18,8 @@ from mediagoblin.db.migration_tools import RegisterMigration, inspect_table
 
 from sqlalchemy import MetaData, Column, Unicode
 
+import json
+
 MIGRATIONS = {}
 
 
@@ -47,3 +49,62 @@ def webm_640_to_webm_video(db):
                 values(name='webm_video'))
 
     db.commit()
+
+
+@RegisterMigration(3, MIGRATIONS)
+def change_metadata_format(db):
+    """Change orig_metadata format for multi-stream a-v"""
+    db_metadata = MetaData(bind=db.bind)
+
+    vid_data = inspect_table(db_metadata, "video__mediadata")
+
+    for row in db.execute(vid_data.select()):
+        metadata = json.loads(row.orig_metadata)
+
+        if not metadata:
+            continue
+
+        # before this migration there was info about only one video or audio
+        # stream. So, we store existing info as the first item in the list
+        new_metadata = {'audio': [], 'video': [], 'common': {}}
+        video_key_map = {  # old: new
+                'videoheight': 'height',
+                'videowidth': 'width',
+                'videorate': 'rate',
+                }
+        audio_key_map = {  # old: new
+                'audiochannels': 'channels',
+                }
+        common_key_map = {
+                'videolength': 'length',
+                }
+
+        new_metadata['video'] = [dict((v, metadata.get(k))
+                for k, v in video_key_map.items() if metadata.get(k))]
+        new_metadata['audio'] = [dict((v, metadata.get(k))
+                for k, v in audio_key_map.items() if metadata.get(k))]
+        new_metadata['common'] = dict((v, metadata.get(k))
+                for k, v in common_key_map.items() if metadata.get(k))
+        
+        # 'mimetype' should be in tags
+        new_metadata['common']['tags'] = {'mimetype': metadata.get('mimetype')}
+        if 'tags' in metadata:
+            new_metadata['video'][0]['tags'] = {}
+            new_metadata['audio'][0]['tags'] = {}
+
+            tags = metadata['tags']
+
+            video_keys = ['encoder', 'encoder-version', 'video-codec']
+            audio_keys = ['audio-codec']
+
+            for t, v in tags.items():
+                if t in video_keys:
+                    new_metadata['video'][0]['tags'][t] = tags[t]
+                elif t in audio_keys:
+                    new_metadata['audio'][0]['tags'][t] = tags[t]
+                else:
+                    new_metadata['common']['tags'][t] = tags[t]
+        db.execute(vid_data.update()
+                .where(vid_data.c.media_entry==row.media_entry)
+                .values(orig_metadata=json.dumps(new_metadata)))
+    db.commit()
diff --git a/mediagoblin/media_types/video/models.py b/mediagoblin/media_types/video/models.py
index be9d258f..4d539e7b 100644
--- a/mediagoblin/media_types/video/models.py
+++ b/mediagoblin/media_types/video/models.py
@@ -68,19 +68,18 @@ class VideoData(Base):
         """
         orig_metadata = self.orig_metadata or {}
 
-        if "webm_video" not in self.get_media_entry.media_files \
-           and "mimetype" in orig_metadata \
-           and "tags" in orig_metadata \
-           and "audio-codec" in orig_metadata["tags"] \
-           and "video-codec" in orig_metadata["tags"]:
+        if ("webm_video" not in self.get_media_entry.media_files
+           and "mimetype" in orig_metadata['common']['tags']
+           and "codec" in orig_metadata['audio']
+           and "codec" in orig_metadata['video']):
             if orig_metadata['mimetype'] == 'application/ogg':
                 # stupid ambiguous .ogg extension
                 mimetype = "video/ogg"
             else:
-                mimetype = orig_metadata['mimetype']
+                mimetype = orig_metadata['common']['tags']['mimetype']
 
-            video_codec = orig_metadata["tags"]["video-codec"].lower()
-            audio_codec = orig_metadata["tags"]["audio-codec"].lower()
+            video_codec = orig_metadata["video"]["codec"].lower()
+            audio_codec = orig_metadata["audio"]["codec"].lower()
 
             # We don't want the "video" at the end of vp8...
             # not sure of a nicer way to be cleaning this stuff
diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index bd4c09d0..0c897495 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -73,6 +73,37 @@ def sniff_handler(media_file, filename):
         _log.error('Could not discover {0}'.format(filename))
         return None
 
+def get_tags(stream_info):
+    'gets all tags and their values from stream info'
+    taglist = stream_info.get_tags()
+    if not taglist:
+        return {}
+    tags = []
+    taglist.foreach(
+            lambda list, tag: tags.append((tag, list.get_value_index(tag, 0))))
+    tags = dict(tags)
+
+    # date/datetime should be converted from GDate/GDateTime to strings
+    if 'date' in tags:
+        date = tags['date']
+        tags['date'] = "%s-%s-%s" % (
+                date.year, date.month, date.day)
+
+    if 'datetime' in tags:
+        # TODO: handle timezone info; gst.get_time_zone_offset +
+        # python's tzinfo should help
+        dt = tags['datetime']
+        tags['datetime'] = datetime.datetime(
+            dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
+            dt.get_minute(), dt.get_second(),
+            dt.get_microsecond()).isoformat()
+    for k, v in tags.items():
+        # types below are accepted by json; others must not present
+        if not isinstance(v, (dict, list, basestring, int, float, bool,
+                              type(None))):
+            del tags[k]
+    return dict(tags)
+
 def store_metadata(media_entry, metadata):
     """
     Store metadata from this video for this media entry.
@@ -80,59 +111,40 @@ def store_metadata(media_entry, metadata):
     stored_metadata = dict()
     audio_info_list = metadata.get_audio_streams()
     if audio_info_list:
-        audio_info = audio_info_list[0]
-        stored_metadata['audiochannels'] = audio_info.get_channels()
-    # video is always there
-    video_info = metadata.get_video_streams()[0]
-    # Let's pull out the easy, not having to be converted ones first
-    stored_metadata = dict()
-    audio_info_list = metadata.get_audio_streams()
-    if audio_info:
-        audio_info = audio_info_list[0]
-        stored_metadata['audiochannels'] = audio_info.get_channels()
-    # video is always there
-    video_info = metadata.get_video_streams()[0]
-    # Let's pull out the easy, not having to be converted ones first
-    stored_metadata['videoheight'] = video_info.get_height()
-    stored_metadata['videowidth'] = video_info.get_width()
-    stored_metadata['videolength'] = metadata.get_duration()
-    stored_metadata['mimetype'] = metadata.get_tags().get_string('mimetype')
-    # We have to convert videorate into a sequence because it's a
-    # special type normally..
-    stored_metadata['videorate'] = [video_info.get_framerate_num(),
-                                   video_info.get_framerate_denom()]
+        stored_metadata['audio'] = []
+    for audio_info in audio_info_list:
+        stored_metadata['audio'].append(
+                {
+                    'channels': audio_info.get_channels(),
+                    'bitrate': audio_info.get_bitrate(),
+                    'depth': audio_info.get_depth(),
+                    'languange': audio_info.get_language(),
+                    'sample_rate': audio_info.get_sample_rate(),
+                    'tags': get_tags(audio_info)
+                })
 
-    if metadata.get_tags():
-        tags_metadata = metadata.get_tags()
-        # we don't use *all* of these, but we know these ones are
-        # safe...
-        # get_string returns (success, value) tuple
-        tags = dict(
-            [(key, tags_metadata.get_string(key)[1])
-             for key in [
-                 "application-name", "artist", "audio-codec", "bitrate",
-                 "container-format", "copyright", "encoder",
-                 "encoder-version", "license", "nominal-bitrate", "title",
-                 "video-codec"]
-             if tags_metadata.get_string(key)[0]])
-        (success, date) = tags_metadata.get_date('date')
-        if success:
-            tags['date'] = "%s-%s-%s" % (
-                date.year, date.month, date.day)
+    video_info_list = metadata.get_video_streams()
+    if video_info_list:
+        stored_metadata['video'] = []
+    for video_info in video_info_list:
+        stored_metadata['video'].append(
+                {
+                    'width': video_info.get_width(),
+                    'height': video_info.get_height(),
+                    'bitrate': video_info.get_bitrate(),
+                    'depth': video_info.get_depth(),
+                    'videorate': [video_info.get_framerate_num(),
+                                  video_info.get_framerate_denom()],
+                    'tags': get_tags(video_info)
+                })
 
-        # TODO: handle timezone info; gst.get_time_zone_offset +
-        #   python's tzinfo should help
-        (success, dt) = tags_metadata.get_date_time('datetime')
-        if success:
-            tags['datetime'] = datetime.datetime(
-                dt.get_year(), dt.get_month(), dt.get_day(), dt.get_hour(),
-                dt.get_minute(), dt.get_second(),
-                dt.get_microsecond()).isoformat()
-        stored_metadata['tags'] = tags
+    stored_metadata['common'] = {
+        'duration': metadata.get_duration(),
+        'tags': get_tags(metadata),
+    }
     # Only save this field if there's something to save
     if len(stored_metadata):
-        media_entry.media_data_init(
-            orig_metadata=stored_metadata)
+        media_entry.media_data_init(orig_metadata=stored_metadata)
 
 
 class CommonVideoProcessor(MediaProcessor):
@@ -234,7 +246,8 @@ class CommonVideoProcessor(MediaProcessor):
         if skip_transcode(metadata, medium_size):
             _log.debug('Skipping transcoding')
 
-            dst_dimensions = metadata['videowidth'], metadata['videoheight']
+            dst_dimensions = (metadata.get_video_streams()[0].get_width(),
+                    metadata.get_video_streams()[0].get_height())
 
             # If there is an original and transcoded, delete the transcoded
             # since it must be of lower quality then the original
diff --git a/mediagoblin/media_types/video/util.py b/mediagoblin/media_types/video/util.py
index 4dc395b4..604702d7 100644
--- a/mediagoblin/media_types/video/util.py
+++ b/mediagoblin/media_types/video/util.py
@@ -43,23 +43,23 @@ def skip_transcode(metadata, size):
                 config['container_formats']):
             return False
 
-    if (config['video_codecs'] and
-            metadata.get_tags().get_string('video-codec')):
-        if not (metadata.get_tags().get_string('video-codec') in
-                config['video_codecs']):
-            return False
+    if config['video_codecs']:
+        for video_info in metadata.get_video_streams():
+            if not (video_info.get_tags().get_string('video-codec') in
+                    config['video_codecs']):
+                return False
 
-    if (config['audio_codecs'] and
-            metadata.get_tags().get_string('audio-codec')):
-        if not (metadata.get_tags().get_string('audio-codec') in
-                config['audio_codecs']):
-            return False
+    if config['audio_codecs']:
+        for audio_info in metadata.get_audio_streams():
+            if not (audio_info.get_tags().get_string('audio-codec') in
+                    config['audio_codecs']):
+                return False
 
-    video_info = metadata.get_video_streams()[0]
     if config['dimensions_match']:
-        if not video_info.get_height() <= size[1]:
-            return False
-        if not video_info.get_width() <= size[0]:
-            return False
+        for video_info in metadata.get_video_streams():
+            if not video_info.get_height() <= size[1]:
+                return False
+            if not video_info.get_width() <= size[0]:
+                return False
 
     return True

From fd365db43a01f8ebbcc91b95e192423296ca5f80 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Mon, 11 Aug 2014 21:09:22 +0400
Subject: [PATCH 10/13] added tests skipping if there is no proper gstreamer

---
 mediagoblin/tests/test_audio.py | 2 +-
 mediagoblin/tests/test_video.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mediagoblin/tests/test_audio.py b/mediagoblin/tests/test_audio.py
index 740d9cdd..62d582ff 100644
--- a/mediagoblin/tests/test_audio.py
+++ b/mediagoblin/tests/test_audio.py
@@ -24,7 +24,7 @@ import imghdr
 
 #os.environ['GST_DEBUG'] = '4,python:4'
 
-#TODO: this should be skipped if video plugin is not enabled
+pytest.importorskip("gi.repository.Gst")
 import gi
 gi.require_version('Gst', '1.0')
 from gi.repository import Gst
diff --git a/mediagoblin/tests/test_video.py b/mediagoblin/tests/test_video.py
index 03298b67..79244515 100644
--- a/mediagoblin/tests/test_video.py
+++ b/mediagoblin/tests/test_video.py
@@ -20,8 +20,9 @@ from contextlib import contextmanager
 import imghdr
 
 #os.environ['GST_DEBUG'] = '4,python:4'
+import pytest
+pytest.importorskip("gi.repository.Gst")
 
-#TODO: this should be skipped if video plugin is not enabled
 import gi
 gi.require_version('Gst', '1.0')
 from gi.repository import Gst

From 9834c876b736614a8e1be835e6b21ea04b77bd03 Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Mon, 18 Aug 2014 05:09:53 +0300
Subject: [PATCH 11/13] added missing import to video transcoding

---
 mediagoblin/media_types/video/transcoders.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mediagoblin/media_types/video/transcoders.py b/mediagoblin/media_types/video/transcoders.py
index 20f21697..3c060fd7 100644
--- a/mediagoblin/media_types/video/transcoders.py
+++ b/mediagoblin/media_types/video/transcoders.py
@@ -20,7 +20,9 @@ import os
 import sys
 import logging
 import multiprocessing
+
 from mediagoblin.media_types.tools import discover
+from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
 
 #os.environ['GST_DEBUG'] = '4,python:4'
 

From 6e4eccb1d4da539015237cfce0306ac52da04f5f Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Wed, 18 Feb 2015 02:05:57 +0300
Subject: [PATCH 12/13] Missing codecs processing with gst 1.4

Gst 1.4 deprecated DiscovererInfo.get_misc in favour of
DiscovererInfo.get_missing_elements_installer_details. Something in
Python bindings seems to be broken and get_misc seems to be not
deprecated but broken.

The change makes sniffer use the method if get_misc fails.
---
 mediagoblin/media_types/video/processing.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/mediagoblin/media_types/video/processing.py b/mediagoblin/media_types/video/processing.py
index 0c897495..bf195222 100644
--- a/mediagoblin/media_types/video/processing.py
+++ b/mediagoblin/media_types/video/processing.py
@@ -52,7 +52,7 @@ def sniffer(media_file):
         data = transcoders.discover(media_file.name)
     except Exception as e:
         # this is usually GLib.GError, but we don't really care which one
-        _log.debug(u'GStreamer: {0}'.format(unicode(e)))
+        _log.warning(u'GStreamer: {0}'.format(unicode(e)))
         raise MissingComponents(u'GStreamer: {0}'.format(unicode(e)))
     _log.debug('Discovered: {0}'.format(data))
 
@@ -60,8 +60,19 @@ def sniffer(media_file):
         raise MissingComponents('No video streams found in this video')
 
     if data.get_result() != 0:  # it's 0 if success
-        name = data.get_misc().get_string('name')  # XXX: is there always name?
-        raise MissingComponents(u'{0} is missing'.format(name))
+        try:
+            missing = data.get_misc().get_string('name')
+            _log.warning('GStreamer: missing {0}'.format(missing))
+        except AttributeError as e:
+            # AttributeError happens here on gstreamer >1.4, when get_misc
+            # returns None. There is a special function to get info about
+            # missing plugin. This info should be printed to logs for admin and
+            # showed to the user in a short and nice version
+            details = data.get_missing_elements_installer_details()
+            _log.warning('GStreamer: missing: {0}'.format(', '.join(details)))
+            missing = u', '.join([u'{0} ({1})'.format(*d.split('|')[3:])
+                                  for d in details])
+        raise MissingComponents(u'{0} is missing'.format(missing))
 
     return MEDIA_TYPE
 

From eade529e189ee688ba125c7a7e26e8cb8ed6aedd Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Wed, 18 Feb 2015 23:25:41 +0300
Subject: [PATCH 13/13] added missing package to deployment instructions

---
 docs/source/siteadmin/media-types.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/siteadmin/media-types.rst b/docs/source/siteadmin/media-types.rst
index b8157e59..7d9f72b0 100644
--- a/docs/source/siteadmin/media-types.rst
+++ b/docs/source/siteadmin/media-types.rst
@@ -86,7 +86,8 @@ good/bad/ugly).  On Debianoid systems
         gstreamer1.0-plugins-good \
         gstreamer1.0-plugins-ugly \
         gstreamer1.0-plugins-bad \
-        gstreamer1.0-libav
+        gstreamer1.0-libav \
+        python-gst-1.0
 
 
 Add ``[[mediagoblin.media_types.video]]`` under the ``[plugins]`` section in