Reinstate audio spectrograms on Python 3 [#5610].

The `audiotospectrogram` module is a complete rewrite of the existing spectrogram code with support for Python 3. This allows us to drop the bundled `freesound` library and Python 2-only `audioprocessing` and `spectrogram` modules. Signed-off-by: Ben Sturmfels <ben@sturm.com.au>
2021-03-03 22:16:37 +11:00
parent 83429a8658
commit c2e93da0ce
11 changed files with 311 additions and 1042 deletions
--- a/3
+++ b/3
@@ -81,7 +81,8 @@ gstreamer1.0-plugins-bad \
 gstreamer1.0-plugins-base \
 gstreamer1.0-plugins-good \
 gstreamer1.0-plugins-ugly \
-python3-gst-1.0
+python3-gst-1.0 \
+python3-numpy

 # Install video dependencies.
 RUN apt-get install -y \
--- a/1
+++ b/1
@@ -43,6 +43,7 @@ which
 # gstreamer1.0-plugins-good \
 # gstreamer1.0-plugins-ugly \
 # python3-gst-1.0 \
+# python3-numpy

 # RUN apt-get install -y \
 # gir1.2-gst-plugins-base-1.0 \
--- a/docs/source/siteadmin/media-types.rst
+++ b/docs/source/siteadmin/media-types.rst
@@ -92,10 +92,11 @@ as whatever GStreamer plugins you want, good/bad/ugly):

    # Debian and co.
    sudo apt install python3-gst-1.0 gstreamer1.0-plugins-{base,bad,good,ugly} \
-    gstreamer1.0-libav
+    gstreamer1.0-libav python3-numpy

    # Fedora and co.
-    sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free}
+    sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free} \
+    python3-numpy

 Add ``[[mediagoblin.media_types.audio]]`` under the ``[plugins]`` section in your
 ``mediagoblin.ini`` and update MediaGoblin::
--- a/docs/source/siteadmin/relnotes.rst
+++ b/docs/source/siteadmin/relnotes.rst
@@ -30,6 +30,7 @@ carefully, or at least skim over it.
 **Improvements:**

 - Drop Python 2 installation support (Ben Sturmfels)
+ - Reinstate Python 3 audio spectrograms [#5610] (Fernando Gutierrez)

 **Bug fixes:**

--- a/extlib/freesound/audioprocessing.py
+++ b/extlib/freesound/audioprocessing.py
@@ -1,616 +0,0 @@
-#!/usr/bin/env python
-# processing.py -- various audio processing functions
-# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
-#                    UNIVERSITAT POMPEU FABRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-#   Bram de Jong <bram.dejong at domain.com where domain in gmail>
-#   2012, Joar Wandborg <first name at last name dot se>
-
-from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport
-from functools import partial
-import math
-import numpy
-import os
-import re
-import signal
-
-
-def get_sound_type(input_filename):
-    sound_type = os.path.splitext(input_filename.lower())[1].strip(".")
-
-    if sound_type == "fla":
-        sound_type = "flac"
-    elif sound_type == "aif":
-        sound_type = "aiff"
-
-    return sound_type
-
-
-try:
-    import scikits.audiolab as audiolab
-except ImportError:
-    print "WARNING: audiolab is not installed so wav2png will not work"
-import subprocess
-
-class AudioProcessingException(Exception):
-    pass
-
-class TestAudioFile(object):
-    """A class that mimics audiolab.sndfile but generates noise instead of reading
-    a wave file. Additionally it can be told to have a "broken" header and thus crashing
-    in the middle of the file. Also useful for testing ultra-short files of 20 samples."""
-    def __init__(self, num_frames, has_broken_header=False):
-        self.seekpoint = 0
-        self.nframes = num_frames
-        self.samplerate = 44100
-        self.channels = 1
-        self.has_broken_header = has_broken_header
-
-    def seek(self, seekpoint):
-        self.seekpoint = seekpoint
-
-    def read_frames(self, frames_to_read):
-        if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2:
-            raise RuntimeError()
-
-        num_frames_left = self.num_frames - self.seekpoint
-        will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read
-        self.seekpoint += will_read
-        return numpy.random.random(will_read)*2 - 1
-
-
-def get_max_level(filename):
-    max_value = 0
-    buffer_size = 4096
-    audio_file = audiolab.Sndfile(filename, 'r')
-    n_samples_left = audio_file.nframes
-
-    while n_samples_left:
-        to_read = min(buffer_size, n_samples_left)
-
-        try:
-            samples = audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen with a broken header
-            break
-
-        # convert to mono by selecting left channel only
-        if audio_file.channels > 1:
-            samples = samples[:,0]
-
-        max_value = max(max_value, numpy.abs(samples).max())
-
-        n_samples_left -= to_read
-
-    audio_file.close()
-
-    return max_value
-
-class AudioProcessor(object):
-    """
-    The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
-    samples in that chunk of audio.
-    """
-    def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
-        max_level = get_max_level(input_filename)
-
-        self.audio_file = audiolab.Sndfile(input_filename, 'r')
-        self.fft_size = fft_size
-        self.window = window_function(self.fft_size)
-        self.spectrum_range = None
-        self.lower = 100
-        self.higher = 22050
-        self.lower_log = math.log10(self.lower)
-        self.higher_log = math.log10(self.higher)
-        self.clip = lambda val, low, high: min(high, max(low, val))
-
-        # figure out what the maximum value is for an FFT doing the FFT of a DC signal
-        fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
-        max_fft = (numpy.abs(fft)).max()
-        # set the scale to normalized audio and normalized FFT
-        self.scale = 1.0/max_level/max_fft if max_level > 0 else 1
-
-    def read(self, start, size, resize_if_less=False):
-        """ read size samples starting at start, if resize_if_less is True and less than size
-        samples are read, resize the array to size and fill with zeros """
-
-        # number of zeros to add to start and end of the buffer
-        add_to_start = 0
-        add_to_end = 0
-
-        if start < 0:
-            # the first FFT window starts centered around zero
-            if size + start <= 0:
-                return numpy.zeros(size) if resize_if_less else numpy.array([])
-            else:
-                self.audio_file.seek(0)
-
-                add_to_start = -start # remember: start is negative!
-                to_read = size + start
-
-                if to_read > self.audio_file.nframes:
-                    add_to_end = to_read - self.audio_file.nframes
-                    to_read = self.audio_file.nframes
-        else:
-            self.audio_file.seek(start)
-
-            to_read = size
-            if start + to_read >= self.audio_file.nframes:
-                to_read = self.audio_file.nframes - start
-                add_to_end = size - to_read
-
-        try:
-            samples = self.audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen for wave files with broken headers...
-            return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
-
-        # convert to mono by selecting left channel only
-        if self.audio_file.channels > 1:
-            samples = samples[:,0]
-
-        if resize_if_less and (add_to_start > 0 or add_to_end > 0):
-            if add_to_start > 0:
-                samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
-
-            if add_to_end > 0:
-                samples = numpy.resize(samples, size)
-                samples[size - add_to_end:] = 0
-
-        return samples
-
-
-    def spectral_centroid(self, seek_point, spec_range=110.0):
-        """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
-
-        samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
-
-        samples *= self.window
-        fft = numpy.fft.rfft(samples)
-        spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
-        length = numpy.float64(spectrum.shape[0])
-
-        # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
-        db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
-
-        energy = spectrum.sum()
-        spectral_centroid = 0
-
-        if energy > 1e-60:
-            # calculate the spectral centroid
-
-            if self.spectrum_range == None:
-                self.spectrum_range = numpy.arange(length)
-
-            spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
-
-            # clip > log10 > scale between 0 and 1
-            spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
-
-        return (spectral_centroid, db_spectrum)
-
-
-    def peaks(self, start_seek, end_seek):
-        """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
-        in that range. Returns that pair in the order they were found. So if min was found first,
-        it returns (min, max) else the other way around. """
-
-        # larger blocksizes are faster but take more mem...
-        # Aha, Watson, a clue, a tradeof!
-        block_size = 4096
-
-        max_index = -1
-        max_value = -1
-        min_index = -1
-        min_value = 1
-
-        if start_seek < 0:
-            start_seek = 0
-
-        if end_seek > self.audio_file.nframes:
-            end_seek = self.audio_file.nframes
-
-        if end_seek <= start_seek:
-            samples = self.read(start_seek, 1)
-            return (samples[0], samples[0])
-
-        if block_size > end_seek - start_seek:
-            block_size = end_seek - start_seek
-
-        for i in range(start_seek, end_seek, block_size):
-            samples = self.read(i, block_size)
-
-            local_max_index = numpy.argmax(samples)
-            local_max_value = samples[local_max_index]
-
-            if local_max_value > max_value:
-                max_value = local_max_value
-                max_index = local_max_index
-
-            local_min_index = numpy.argmin(samples)
-            local_min_value = samples[local_min_index]
-
-            if local_min_value < min_value:
-                min_value = local_min_value
-                min_index = local_min_index
-
-        return (min_value, max_value) if min_index < max_index else (max_value, min_value)
-
-
-def interpolate_colors(colors, flat=False, num_colors=256):
-    """ given a list of colors, create a larger list of colors interpolating
-    the first one. If flatten is True a list of numers will be returned. If
-    False, a list of (r,g,b) tuples. num_colors is the number of colors wanted
-    in the final list """
-
-    palette = []
-
-    for i in range(num_colors):
-        index = (i * (len(colors) - 1))/(num_colors - 1.0)
-        index_int = int(index)
-        alpha = index - float(index_int)
-
-        if alpha > 0:
-            r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0]
-            g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1]
-            b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2]
-        else:
-            r = (1.0 - alpha) * colors[index_int][0]
-            g = (1.0 - alpha) * colors[index_int][1]
-            b = (1.0 - alpha) * colors[index_int][2]
-
-        if flat:
-            palette.extend((int(r), int(g), int(b)))
-        else:
-            palette.append((int(r), int(g), int(b)))
-
-    return palette
-
-
-def desaturate(rgb, amount):
-    """
-        desaturate colors by amount
-        amount == 0, no change
-        amount == 1, grey
-    """
-    luminosity = sum(rgb) / 3.0
-    desat = lambda color: color - amount * (color - luminosity)
-
-    return tuple(map(int, map(desat, rgb)))
-
-
-class WaveformImage(object):
-    """
-    Given peaks and spectral centroids from the AudioProcessor, this class will construct
-    a wavefile image which can be saved as PNG.
-    """
-    def __init__(self, image_width, image_height, palette=1):
-        if image_height % 2 == 0:
-            raise AudioProcessingException("Height should be uneven: images look much better at uneven height")
-
-        if palette == 1:
-            background_color = (0,0,0)
-            colors = [
-                        (50,0,200),
-                        (0,220,80),
-                        (255,224,0),
-                        (255,70,0),
-                     ]
-        elif palette == 2:
-            background_color = (0,0,0)
-            colors = [self.color_from_value(value/29.0) for value in range(0,30)]
-        elif palette == 3:
-            background_color = (213, 217, 221)
-            colors = map( partial(desaturate, amount=0.7), [
-                        (50,0,200),
-                        (0,220,80),
-                        (255,224,0),
-                     ])
-        elif palette == 4:
-            background_color = (213, 217, 221)
-            colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)])
-
-        self.image = Image.new("RGB", (image_width, image_height), background_color)
-
-        self.image_width = image_width
-        self.image_height = image_height
-
-        self.draw = ImageDraw.Draw(self.image)
-        self.previous_x, self.previous_y = None, None
-
-        self.color_lookup = interpolate_colors(colors)
-        self.pix = self.image.load()
-
-    def color_from_value(self, value):
-        """ given a value between 0 and 1, return an (r,g,b) tuple """
-
-        return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50))
-
-    def draw_peaks(self, x, peaks, spectral_centroid):
-        """ draw 2 peaks at x using the spectral_centroid for color """
-
-        y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5
-        y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5
-
-        line_color = self.color_lookup[int(spectral_centroid*255.0)]
-
-        if self.previous_y != None:
-            self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color)
-        else:
-            self.draw.line([x, y1, x, y2], line_color)
-
-        self.previous_x, self.previous_y = x, y2
-
-        self.draw_anti_aliased_pixels(x, y1, y2, line_color)
-
-    def draw_anti_aliased_pixels(self, x, y1, y2, color):
-        """ vertical anti-aliasing at y1 and y2 """
-
-        y_max = max(y1, y2)
-        y_max_int = int(y_max)
-        alpha = y_max - y_max_int
-
-        if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height:
-            current_pix = self.pix[x, y_max_int + 1]
-
-            r = int((1-alpha)*current_pix[0] + alpha*color[0])
-            g = int((1-alpha)*current_pix[1] + alpha*color[1])
-            b = int((1-alpha)*current_pix[2] + alpha*color[2])
-
-            self.pix[x, y_max_int + 1] = (r,g,b)
-
-        y_min = min(y1, y2)
-        y_min_int = int(y_min)
-        alpha = 1.0 - (y_min - y_min_int)
-
-        if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0:
-            current_pix = self.pix[x, y_min_int - 1]
-
-            r = int((1-alpha)*current_pix[0] + alpha*color[0])
-            g = int((1-alpha)*current_pix[1] + alpha*color[1])
-            b = int((1-alpha)*current_pix[2] + alpha*color[2])
-
-            self.pix[x, y_min_int - 1] = (r,g,b)
-
-    def save(self, filename):
-        # draw a zero "zero" line
-        a = 25
-        for x in range(self.image_width):
-            self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2]))
-
-        self.image.save(filename)
-
-
-class SpectrogramImage(object):
-    """
-    Given spectra from the AudioProcessor, this class will construct a wavefile image which
-    can be saved as PNG.
-    """
-    def __init__(self, image_width, image_height, fft_size):
-        self.image_width = image_width
-        self.image_height = image_height
-        self.fft_size = fft_size
-
-        self.image = Image.new("RGBA", (image_height, image_width))
-
-        colors = [
-            (0, 0, 0, 0),
-            (58/4, 68/4, 65/4, 255),
-            (80/2, 100/2, 153/2, 255),
-            (90, 180, 100, 255),
-            (224, 224, 44, 255),
-            (255, 60, 30, 255),
-            (255, 255, 255, 255)
-         ]
-        self.palette = interpolate_colors(colors)
-
-        # generate the lookup which translates y-coordinate to fft-bin
-        self.y_to_bin = []
-        f_min = 100.0
-        f_max = 22050.0
-        y_min = math.log10(f_min)
-        y_max = math.log10(f_max)
-        for y in range(self.image_height):
-            freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min))
-            bin = freq / 22050.0 * (self.fft_size/2 + 1)
-
-            if bin < self.fft_size/2:
-                alpha = bin - int(bin)
-
-                self.y_to_bin.append((int(bin), alpha * 255))
-
-        # this is a bit strange, but using image.load()[x,y] = ... is
-        # a lot slower than using image.putadata and then rotating the image
-        # so we store all the pixels in an array and then create the image when saving
-        self.pixels = []
-
-    def draw_spectrum(self, x, spectrum):
-        # for all frequencies, draw the pixels
-        for (index, alpha) in self.y_to_bin:
-            self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] )
-
-        # if the FFT is too small to fill up the image, fill with black to the top
-        for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable
-            self.pixels.append(self.palette[0])
-
-    def save(self, filename, quality=80):
-        assert filename.lower().endswith(".jpg")
-        self.image.putdata(self.pixels)
-        self.image.transpose(Image.ROTATE_90).save(filename, quality=quality)
-
-
-def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None):
-    """
-    Utility function for creating both wavefile and spectrum images from an audio input file.
-    """
-    processor = AudioProcessor(input_filename, fft_size, numpy.hanning)
-    samples_per_pixel = processor.audio_file.nframes / float(image_width)
-
-    waveform = WaveformImage(image_width, image_height)
-    spectrogram = SpectrogramImage(image_width, image_height, fft_size)
-
-    for x in range(image_width):
-
-        if progress_callback and x % (image_width/10) == 0:
-            progress_callback((x*100)/image_width)
-
-        seek_point = int(x * samples_per_pixel)
-        next_seek_point = int((x + 1) * samples_per_pixel)
-
-        (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
-        peaks = processor.peaks(seek_point, next_seek_point)
-
-        waveform.draw_peaks(x, peaks, spectral_centroid)
-        spectrogram.draw_spectrum(x, db_spectrum)
-
-    if progress_callback:
-        progress_callback(100)
-
-    waveform.save(output_filename_w)
-    spectrogram.save(output_filename_s)
-
-
-class NoSpaceLeftException(Exception):
-    pass
-
-def convert_to_pcm(input_filename, output_filename):
-    """
-    converts any audio file type to pcm audio
-    """
-
-    if not os.path.exists(input_filename):
-        raise AudioProcessingException("file %s does not exist" % input_filename)
-
-    sound_type = get_sound_type(input_filename)
-
-    if sound_type == "mp3":
-        cmd = ["lame", "--decode", input_filename, output_filename]
-    elif sound_type == "ogg":
-        cmd = ["oggdec", input_filename, "-o", output_filename]
-    elif sound_type == "flac":
-        cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename]
-    else:
-        return False
-
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdout, stderr) = process.communicate()
-
-    if process.returncode != 0 or not os.path.exists(output_filename):
-        if "No space left on device" in stderr + " " + stdout:
-            raise NoSpaceLeftException
-        raise AudioProcessingException("failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
-
-    return True
-
-
-def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename):
-    """
-    converts a pcm wave file to two channel, 16 bit integer
-    """
-
-    if not os.path.exists(input_filename):
-        raise AudioProcessingException("file %s does not exist" % input_filename)
-
-    cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename]
-
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdout, stderr) = process.communicate()
-
-    if process.returncode != 0 or not os.path.exists(output_filename):
-        if "No space left on device" in stderr + " " + stdout:
-            raise NoSpaceLeftException
-        raise AudioProcessingException("failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
-
-    stdout = (stdout + " " + stderr).replace("\n", " ")
-
-    duration = 0
-    m = re.match(r".*#duration (?P<duration>[\d\.]+).*",  stdout)
-    if m != None:
-        duration = float(m.group("duration"))
-
-    channels = 0
-    m = re.match(r".*#channels (?P<channels>\d+).*", stdout)
-    if m != None:
-        channels = float(m.group("channels"))
-
-    samplerate = 0
-    m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout)
-    if m != None:
-        samplerate = float(m.group("samplerate"))
-
-    bitdepth = None
-    m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout)
-    if m != None:
-        bitdepth = float(m.group("bitdepth"))
-
-    bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0
-
-    return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth)
-
-
-def convert_to_mp3(input_filename, output_filename, quality=70):
-    """
-    converts the incoming wave file to a mp3 file
-    """
-
-    if not os.path.exists(input_filename):
-        raise AudioProcessingException("file %s does not exist" % input_filename)
-
-    command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename]
-
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdout, stderr) = process.communicate()
-
-    if process.returncode != 0 or not os.path.exists(output_filename):
-        raise AudioProcessingException(stdout)
-
-def convert_to_ogg(input_filename, output_filename, quality=1):
-    """
-    converts the incoming wave file to n ogg file
-    """
-
-    if not os.path.exists(input_filename):
-        raise AudioProcessingException("file %s does not exist" % input_filename)
-
-    command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename]
-
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdout, stderr) = process.communicate()
-
-    if process.returncode != 0 or not os.path.exists(output_filename):
-        raise AudioProcessingException(stdout)
-
-def convert_using_ffmpeg(input_filename, output_filename):
-    """
-    converts the incoming wave file to stereo pcm using fffmpeg
-    """
-    TIMEOUT = 3 * 60
-    def  alarm_handler(signum, frame):
-        raise AudioProcessingException("timeout while waiting for ffmpeg")
-
-    if not os.path.exists(input_filename):
-        raise AudioProcessingException("file %s does not exist" % input_filename)
-
-    command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename]
-
-    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    signal.signal(signal.SIGALRM,alarm_handler)
-    signal.alarm(TIMEOUT)
-    (stdout, stderr) = process.communicate()
-    signal.alarm(0)
-    if process.returncode != 0 or not os.path.exists(output_filename):
-        raise AudioProcessingException(stdout)
--- a/mediagoblin/media_types/audio/audioprocessing.py
+++ b/mediagoblin/media_types/audio/audioprocessing.py
@@ -1 +0,0 @@
-../../../extlib/freesound/audioprocessing.py
--- a/mediagoblin/media_types/audio/audiotospectrogram.py
+++ b/mediagoblin/media_types/audio/audiotospectrogram.py
@@ -0,0 +1,297 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from PIL import Image
+import soundfile
+import numpy
+
+SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but
+                                 # usually there isn't much detail in higher frequencies
+SPECTROGRAM_MIN_FREQUENCY = 20
+SPECTROGRAM_DB_RANGE      = 110
+# Color palette copied from old spectrogram.py
+SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4),
+                      (80 / 2, 100 / 2, 153 / 2),
+                      (90, 180, 100),
+                      (224, 224, 44),
+                      (255, 60, 30),
+                      (255, 255, 255)]
+# The purpose of this table is to give more horizontal
+# real estate to shorter sounds files.
+# Format: (pixels, (range_min, range_max))
+# For sounds with a duration >= _range_min_ and < _range_max_
+# give _pixel_ horizontal pixels for each second of audio.
+SPECTROGRAM_WIDTH_PERSECOND = [(240, (  0,     20)),
+                               (120, ( 20,     30)),
+                               ( 60, ( 30,     60)),
+                               ( 30, ( 60,    120)),
+                               ( 15, (120,    240)),
+                               (  6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer
+                                                     # duration will still get assigned to the last bucket
+SPECTROGRAM_HEIGHT = 500
+
+class AudioBlocksFFT:
+
+    def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning):
+        self.audioData = soundfile.SoundFile(fileName, 'r')
+        self.numChannels = self.audioData.channels
+        self.sampleRate = self.audioData.samplerate
+        self.minFreq = minFreq
+        self.maxFreq = maxFreq
+        self.blockSize = blockSize
+        self.numBins = numBins
+        self.overlap = overlap
+        self.windowValues = windowFunction(blockSize)
+        self.peakFFTValue = 0
+        try:
+            # PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__()
+            self.totalSamples = self.audioData.frames
+        except AttributeError:
+            self.totalSamples = len(self.audioData)
+
+    def peakFFTAmplitude(self):
+        """
+        Peak amplitude of FFT for all blocks
+        """
+        return self.peakFFTValue
+
+    def totalSeconds(self):
+        """
+        Total length in seconds
+        """
+        return self.totalSamples / self.sampleRate
+
+    def _filterFreqRange(self, fftAmplitude):
+        """
+        Given a FFT amplitudes array keep only bins between minFreq, maxFreq
+        """
+        nyquistFreq = self.sampleRate // 2
+        numBins = len(fftAmplitude)
+        sliceWidth = nyquistFreq / numBins
+        startIdx = int(self.minFreq / sliceWidth)
+        endIdx = int(self.maxFreq / sliceWidth)
+        if numBins <= endIdx:
+            fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0))
+        else:
+            fftAmplitude = fftAmplitude[:endIdx + 1]
+        return fftAmplitude[startIdx:]
+
+    def _resizeAmplitudeArray(self, amplitudeValues, newSize):
+        """
+        Resize amplitude values array
+        """
+        if len(amplitudeValues) == newSize:
+            return amplitudeValues
+        if newSize > len(amplitudeValues):
+            # Resize up
+            result = numpy.zeros(newSize)
+            for idx in range(0, newSize):
+                srcIdx = (idx * len(amplitudeValues)) // newSize
+                result[idx] = amplitudeValues[srcIdx]
+            return result
+        # Resize down keeping peaks
+        result = numpy.zeros(newSize)
+        idx = 0
+        for slice in numpy.array_split(amplitudeValues, newSize):
+            result[idx] = slice.max()
+            idx = idx + 1
+        return result
+
+    def __iter__(self):
+        """
+        Read a block of audio data and compute FFT amplitudes
+        """
+        self.audioData.seek(0)
+        for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap):
+            # Mix down all channels to mono
+            audioBlock = fileBlock[:,0]
+            for channel in range(1, self.numChannels):
+                audioBlock = numpy.add(audioBlock, fileBlock[:,channel])
+            # On the last block it may be necessary to pad with zeros
+            if len(audioBlock) < self.blockSize:
+                audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0))
+            # Compute FFT amplitude of this block
+            fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues)))
+            self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max())
+            # Resize if requested
+            if not self.numBins is None:
+                fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins)
+            yield (fftAmplitude, self.audioData.tell() / self.sampleRate)
+
+class SpectrogramColorMap:
+
+    def __init__(self, columnData):
+        self.columnData = columnData
+        self.width = len(columnData)
+        self.height = len(columnData[0])
+        self._buildColorPalette()
+
+    def _colorBetween(self, beginColor, endColor, step):
+        """
+        Interpolate between two colors
+        """
+        rS, gS, bS = beginColor
+        rE, gE, bE = endColor
+        r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE)))
+        g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE)))
+        b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE)))
+        r = r if r < 256 else 255
+        g = g if g < 256 else 255
+        b = b if b < 256 else 255
+        return (r, g, b)
+
+    def _buildColorPalette(self):
+        """
+        Build color palette
+        """
+        colorPoints = SPECTROGRAM_COLORS
+        self.colors = []
+        for i in range(1, len(colorPoints)):
+            for p in range(0, 200):
+                self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200))
+
+    def getColorData(self, progressCallback = None):
+        """
+        Map spectrogram data to pixel colors
+        """
+        pixels = [self.colors[0]] * (self.width * self.height)
+        for x in range(0, self.width):
+            for y in range(0, self.height):
+                idx = x + self.width * y
+                amplitudeVal = self.columnData[x][self.height - y - 1]
+                colorIdx = int(len(self.colors) * amplitudeVal)
+                colorIdx = colorIdx if colorIdx > 0 else 0
+                colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1
+                pixels[idx] = self.colors[colorIdx]
+            if progressCallback:
+                progressCallback(100 * x / self.width)
+        return pixels
+
+def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None):
+    """
+    Draw a spectrogram of the audio file
+    """
+
+    # Fraction of total work for each step
+    STEP_PERCENTAGE_FFT        = 40
+    STEP_PERCENTAGE_NORMALIZE  = 5
+    STEP_PERCENTAGE_ACCUMULATE = 10
+    STEP_PERCENTAGE_DRAW       = 40
+    # Give last 5% to saving the file
+
+    PERCENTAGE_REPORT_STEP = 2
+
+    nextReportedPercentage = PERCENTAGE_REPORT_STEP
+    def wrapProgressCallback(percentage):
+        nonlocal nextReportedPercentage
+        percentage = int(percentage)
+        if percentage >= nextReportedPercentage:
+            if progressCallback:
+                progressCallback(percentage)
+            nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP
+
+    def mapColorsProgressCallback(percentage):
+        wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE
+                             + (STEP_PERCENTAGE_DRAW * (percentage / 100)))
+
+    imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND
+    imageHeight = SPECTROGRAM_HEIGHT
+
+    # Load audio file and compute FFT amplitudes
+    fftBlocksSource = AudioBlocksFFT(audioFileName,
+                                     fftSize, overlap = fftOverlap,
+                                     minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY,
+                                     numBins = imageHeight)
+    soundLength = fftBlocksSource.totalSeconds()
+    fftAmplitudeBlocks = []
+    for fftAmplitude, positionSeconds in fftBlocksSource:
+        fftAmplitudeBlocks.append(fftAmplitude)
+        wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength))
+
+    totalProgress = STEP_PERCENTAGE_FFT
+
+    # Normalize FFT amplitude and convert to log scale
+    specRange = SPECTROGRAM_DB_RANGE
+    for i in range(0, len(fftAmplitudeBlocks)):
+        normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude())
+        fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange
+        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks)))
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE
+
+    # Compute spectrogram width in pixels
+    imageWidthPerSecond, lengthRage = imageWidthLookup[-1]
+    for widthPerSecond, lengthLimit in imageWidthLookup:
+        limitLow, limitHigh = lengthLimit
+        if soundLength > limitLow and soundLength <= limitHigh:
+            imageWidthPerSecond = widthPerSecond
+            break
+    imageWidth = int(imageWidthPerSecond * soundLength)
+
+    # Compute spectrogram values
+    columnValues = numpy.zeros(imageHeight)
+    spectrogram = []
+    x = 0
+    for idx in range(0, len(fftAmplitudeBlocks)):
+        newX = (idx * imageWidth) // len(fftAmplitudeBlocks)
+        if newX != x:
+            # Save column
+            spectrogram.append(numpy.copy(columnValues))
+            x = newX
+            columnValues.fill(0)
+        columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx])
+        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks)))
+    spectrogram.append(numpy.copy(columnValues))
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE
+
+    # Draw spectrogram
+    imageWidth = len(spectrogram)
+    colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback)
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_DRAW
+
+    # Save final image
+    image = Image.new('RGB', (imageWidth, imageHeight))
+    image.putdata(colorData)
+    image.save(imageFileName)
+
+    if progressCallback:
+        progressCallback(100)
+
+
+if __name__ == "__main__":
+
+    import sys
+
+    def printProgress(p):
+        sys.stdout.write("\rProgress : {}%".format(p))
+        sys.stdout.flush()
+
+    if not (len(sys.argv) == 2 or len(sys.argv) == 3):
+        print("Usage:\n{0} input_file [output_file]".format(sys.argv[0]))
+        exit()
+
+    audioFile = sys.argv[1]
+
+    if 3 == len(sys.argv):
+        outputFile = sys.argv[2]
+    else:
+        outputFile = 'spectrogram.png'
+
+    sys.stdout.write("Input    : {0}\nOutput   : {1}\n".format(audioFile, outputFile))
+    drawSpectrogram(audioFile, outputFile, progressCallback = printProgress)
+    sys.stdout.write("\nDone!\n")
--- a/mediagoblin/media_types/audio/spectrogram.py
+++ b/mediagoblin/media_types/audio/spectrogram.py
@@ -1,362 +0,0 @@
-# processing.py -- various audio processing functions
-# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
-#                    UNIVERSITAT POMPEU FABRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-#   Bram de Jong <bram.dejong at domain.com where domain in gmail>
-#   2012, Joar Wandborg <first name at last name dot se>
-
-from __future__ import print_function
-
-try:
-    from PIL import Image
-except ImportError:
-    import Image
-import math
-import numpy
-
-try:
-    import scikits.audiolab as audiolab
-except ImportError:
-    print("WARNING: audiolab is not installed so wav2png will not work")
-
-
-class AudioProcessingException(Exception):
-    pass
-
-
-class SpectrogramImage(object):
-    def __init__(self, image_size, fft_size):
-        self.image_width, self.image_height = image_size
-        self.fft_size = fft_size
-
-        colors = [
-            (0, 0, 0, 0),
-            (58 / 4, 68 / 4, 65 / 4, 255),
-            (80 / 2, 100 / 2, 153 / 2, 255),
-            (90, 180, 100, 255),
-            (224, 224, 44, 255),
-            (255, 60, 30, 255),
-            (255, 255, 255, 255)
-         ]
-
-        self.palette = interpolate_colors(colors)
-
-        # Generate lookup table for y-coordinate from fft-bin
-        self.y_to_bin = []
-
-        fft_min = 100.0
-        fft_max = 22050.0  # kHz?
-
-        y_min = math.log10(fft_min)
-        y_max = math.log10(fft_max)
-
-        for y in range(self.image_height):
-            freq = math.pow(
-                    10.0,
-                    y_min + y / (self.image_height - 1.0)
-                    * (y_max - y_min))
-
-            fft_bin = freq / fft_max * (self.fft_size / 2 + 1)
-
-            if fft_bin < self.fft_size / 2:
-                alpha = fft_bin - int(fft_bin)
-
-                self.y_to_bin.append((int(fft_bin), alpha * 255))
-
-        # this is a bit strange, but using image.load()[x,y] = ... is
-        # a lot slower than using image.putadata and then rotating the image
-        # so we store all the pixels in an array and then create the image when saving
-        self.pixels = []
-
-    def draw_spectrum(self, x, spectrum):
-        # for all frequencies, draw the pixels
-        for index, alpha in self.y_to_bin:
-            self.pixels.append(
-                    self.palette[int((255.0 - alpha) * spectrum[index]
-                        + alpha * spectrum[index + 1])])
-
-        # if the FFT is too small to fill up the image, fill with black to the top
-        for y in range(len(self.y_to_bin), self.image_height):
-            self.pixels.append(self.palette[0])
-
-    def save(self, filename, quality=90):
-        self.image = Image.new(
-                'RGBA',
-                (self.image_height, self.image_width))
-
-        self.image.putdata(self.pixels)
-        self.image.transpose(Image.ROTATE_90).save(
-                filename,
-                quality=quality)
-
-
-class AudioProcessor(object):
-    """
-    The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
-    samples in that chunk of audio.
-    """
-    def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
-        max_level = get_max_level(input_filename)
-
-        self.audio_file = audiolab.Sndfile(input_filename, 'r')
-        self.fft_size = fft_size
-        self.window = window_function(self.fft_size)
-        self.spectrum_range = None
-        self.lower = 100
-        self.higher = 22050
-        self.lower_log = math.log10(self.lower)
-        self.higher_log = math.log10(self.higher)
-        self.clip = lambda val, low, high: min(high, max(low, val))
-
-        # figure out what the maximum value is for an FFT doing the FFT of a DC signal
-        fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
-        max_fft = (numpy.abs(fft)).max()
-
-        # set the scale to normalized audio and normalized FFT
-        self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1
-
-    def read(self, start, size, resize_if_less=False):
-        """ read size samples starting at start, if resize_if_less is True and less than size
-        samples are read, resize the array to size and fill with zeros """
-
-        # number of zeros to add to start and end of the buffer
-        add_to_start = 0
-        add_to_end = 0
-
-        if start < 0:
-            # the first FFT window starts centered around zero
-            if size + start <= 0:
-                return numpy.zeros(size) if resize_if_less else numpy.array([])
-            else:
-                self.audio_file.seek(0)
-
-                add_to_start = - start  # remember: start is negative!
-                to_read = size + start
-
-                if to_read > self.audio_file.nframes:
-                    add_to_end = to_read - self.audio_file.nframes
-                    to_read = self.audio_file.nframes
-        else:
-            self.audio_file.seek(start)
-
-            to_read = size
-            if start + to_read >= self.audio_file.nframes:
-                to_read = self.audio_file.nframes - start
-                add_to_end = size - to_read
-
-        try:
-            samples = self.audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen for wave files with broken headers...
-            return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
-
-        # convert to mono by selecting left channel only
-        if self.audio_file.channels > 1:
-            samples = samples[:,0]
-
-        if resize_if_less and (add_to_start > 0 or add_to_end > 0):
-            if add_to_start > 0:
-                samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
-
-            if add_to_end > 0:
-                samples = numpy.resize(samples, size)
-                samples[size - add_to_end:] = 0
-
-        return samples
-
-    def spectral_centroid(self, seek_point, spec_range=110.0):
-        """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
-
-        samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
-
-        samples *= self.window
-        fft = numpy.fft.rfft(samples)
-        spectrum = self.scale * numpy.abs(fft)  # normalized abs(FFT) between 0 and 1
-
-        length = numpy.float64(spectrum.shape[0])
-
-        # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
-        db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
-
-        energy = spectrum.sum()
-        spectral_centroid = 0
-
-        if energy > 1e-60:
-            # calculate the spectral centroid
-
-            if self.spectrum_range == None:
-                self.spectrum_range = numpy.arange(length)
-
-            spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
-
-            # clip > log10 > scale between 0 and 1
-            spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
-
-        return (spectral_centroid, db_spectrum)
-
-
-    def peaks(self, start_seek, end_seek):
-        """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
-        in that range. Returns that pair in the order they were found. So if min was found first,
-        it returns (min, max) else the other way around. """
-
-        # larger blocksizes are faster but take more mem...
-        # Aha, Watson, a clue, a tradeof!
-        block_size = 4096
-
-        max_index = -1
-        max_value = -1
-        min_index = -1
-        min_value = 1
-
-        if start_seek < 0:
-            start_seek = 0
-
-        if end_seek > self.audio_file.nframes:
-            end_seek = self.audio_file.nframes
-
-        if end_seek <= start_seek:
-            samples = self.read(start_seek, 1)
-            return (samples[0], samples[0])
-
-        if block_size > end_seek - start_seek:
-            block_size = end_seek - start_seek
-
-        for i in range(start_seek, end_seek, block_size):
-            samples = self.read(i, block_size)
-
-            local_max_index = numpy.argmax(samples)
-            local_max_value = samples[local_max_index]
-
-            if local_max_value > max_value:
-                max_value = local_max_value
-                max_index = local_max_index
-
-            local_min_index = numpy.argmin(samples)
-            local_min_value = samples[local_min_index]
-
-            if local_min_value < min_value:
-                min_value = local_min_value
-                min_index = local_min_index
-
-        return (min_value, max_value) if min_index < max_index else (max_value, min_value)
-
-
-def create_spectrogram_image(source_filename, output_filename,
-        image_size, fft_size, progress_callback=None):
-
-    processor = AudioProcessor(source_filename, fft_size, numpy.hamming)
-    samples_per_pixel = processor.audio_file.nframes / float(image_size[0])
-
-    spectrogram = SpectrogramImage(image_size, fft_size)
-
-    for x in range(image_size[0]):
-        if progress_callback and x % (image_size[0] / 10) == 0:
-            progress_callback((x * 100) / image_size[0])
-
-        seek_point = int(x * samples_per_pixel)
-        next_seek_point = int((x + 1) * samples_per_pixel)
-
-        (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
-
-        spectrogram.draw_spectrum(x, db_spectrum)
-
-    if progress_callback:
-        progress_callback(100)
-
-    spectrogram.save(output_filename)
-
-
-def interpolate_colors(colors, flat=False, num_colors=256):
-
-    palette = []
-
-    for i in range(num_colors):
-        # TODO: What does this do?
-        index = (
-                (i *
-                    (len(colors) - 1)  # 7
-                )  # 0..7..14..21..28...
-            /
-                (num_colors - 1.0)  # 255.0
-            )
-
-        # TODO: What is the meaning of 'alpha' in this context?
-        alpha = index - round(index)
-
-        channels = list('rgb')
-        values = dict()
-
-        for k, v in zip(range(len(channels)), channels):
-            if alpha > 0:
-                values[v] = (
-                        (1.0 - alpha)
-                    *
-                        colors[int(index)][k]
-                    +
-                        alpha * colors[int(index) + 1][k]
-                    )
-            else:
-                values[v] = (
-                        (1.0 - alpha)
-                    *
-                        colors[int(index)][k]
-                    )
-
-        if flat:
-            palette.extend(
-                tuple(int(values[i]) for i in channels))
-        else:
-            palette.append(
-                tuple(int(values[i]) for i in channels))
-
-    return palette
-
-
-def get_max_level(filename):
-    max_value = 0
-    buffer_size = 4096
-    audio_file = audiolab.Sndfile(filename, 'r')
-    n_samples_left = audio_file.nframes
-
-    while n_samples_left:
-        to_read = min(buffer_size, n_samples_left)
-
-        try:
-            samples = audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen with a broken header
-            break
-
-        # convert to mono by selecting left channel only
-        if audio_file.channels > 1:
-            samples = samples[:,0]
-
-        max_value = max(max_value, numpy.abs(samples).max())
-
-        n_samples_left -= to_read
-
-    audio_file.close()
-
-    return max_value
-
-if __name__ == '__main__':
-    import sys
-    sys.argv[4] = int(sys.argv[4])
-    sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')])
-
-    create_spectrogram_image(*sys.argv[1:])
--- a/mediagoblin/media_types/audio/transcoders.py
+++ b/mediagoblin/media_types/audio/transcoders.py
@@ -43,45 +43,15 @@ gi.require_version('Gst', '1.0')
 from gi.repository import GObject, Gst
 Gst.init(None)

-
-# TODO: Now unused - remove.
-class Python2AudioThumbnailer(object):
+class Python3AudioThumbnailer(object):
    def __init__(self):
        _log.info('Initializing {0}'.format(self.__class__.__name__))

    def spectrogram(self, src, dst, **kw):
-        import numpy
-        # This third-party bundled module is Python 2-only.
-        from mediagoblin.media_types.audio import audioprocessing
-
-        width = kw['width']
-        height = int(kw.get('height', float(width) * 0.3))
-        fft_size = kw.get('fft_size', 2048)
+        from mediagoblin.media_types.audio import audiotospectrogram
+        fft_size = kw.get('fft_size', 1024)
        callback = kw.get('progress_callback')
-        processor = audioprocessing.AudioProcessor(
-            src,
-            fft_size,
-            numpy.hanning)
-
-        samples_per_pixel = processor.audio_file.nframes / float(width)
-
-        spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
-
-        for x in range(width):
-            if callback and x % (width / 10) == 0:
-                callback((x * 100) / width)
-
-            seek_point = int(x * samples_per_pixel)
-
-            (spectral_centroid, db_spectrum) = processor.spectral_centroid(
-                seek_point)
-
-            spectrogram.draw_spectrum(x, db_spectrum)
-
-        if callback:
-            callback(100)
-
-        spectrogram.save(dst)
+        audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback)

    def thumbnail_spectrogram(self, src, dst, thumb_size):
        '''
@@ -111,31 +81,7 @@ class Python2AudioThumbnailer(object):

        th.save(dst)

-
-class DummyAudioThumbnailer(Python2AudioThumbnailer):
-    """A thumbnailer that just outputs a stock image.
-
-    The Python package used for audio spectrograms, "scikits.audiolab", does not
-    support Python 3 and is a constant source of problems for people installing
-    MediaGoblin. Until the feature is rewritten, this thumbnailer class simply
-    provides a generic image.
-
-    TODO: Consider Python 3 compatible interfaces to libsndfile, such as
-    https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here
-    https://issues.mediagoblin.org/ticket/5467#comment:6
-
-    """
-    def spectrogram(self, src, dst, **kw):
-        # Using PIL here in case someone wants to swap out the image for a PNG.
-        # This will convert to JPEG, where simply copying the file won't.
-        img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg')
-        img.save(dst)
-
-
-# Due to recurring problems with spectrograms under Python 2, and the fact we're
-# soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594.
-AudioThumbnailer = DummyAudioThumbnailer
-
+AudioThumbnailer = Python3AudioThumbnailer

 class AudioTranscoder(object):
    def __init__(self):
--- a/mediagoblin/tests/test_audio.py
+++ b/mediagoblin/tests/test_audio.py
@@ -25,7 +25,6 @@ import imghdr
 #os.environ['GST_DEBUG'] = '4,python:4'

 pytest.importorskip("gi.repository.Gst")
-pytest.importorskip("scikits.audiolab")
 import gi
 gi.require_version('Gst', '1.0')
 from gi.repository import Gst
--- a/setup.py
+++ b/setup.py
@@ -72,6 +72,8 @@ install_requires = [
    'PyLD<2.0.0', # Breaks a Python 3 test if >= 2.0.0.
    'ExifRead>=2.0.0',
    'email-validator', # Seems that WTForms must have dropped this.
+    'soundfile<=0.10.999' # Tested with 0.10.3.post1
+
    # This is optional:
    # 'translitcodec',
    # For now we're expecting that users will install this from
				`@@ -1 +0,0 @@`
				`../../../extlib/freesound/audioprocessing.py`