Reinstate audio spectrograms on Python 3 [#5610].
The `audiotospectrogram` module is a complete rewrite of the existing spectrogram code with support for Python 3. This allows us to drop the bundled `freesound` library and Python 2-only `audioprocessing` and `spectrogram` modules. Signed-off-by: Ben Sturmfels <ben@sturm.com.au>
This commit is contained in:
parent
83429a8658
commit
c2e93da0ce
@ -81,7 +81,8 @@ gstreamer1.0-plugins-bad \
|
|||||||
gstreamer1.0-plugins-base \
|
gstreamer1.0-plugins-base \
|
||||||
gstreamer1.0-plugins-good \
|
gstreamer1.0-plugins-good \
|
||||||
gstreamer1.0-plugins-ugly \
|
gstreamer1.0-plugins-ugly \
|
||||||
python3-gst-1.0
|
python3-gst-1.0 \
|
||||||
|
python3-numpy
|
||||||
|
|
||||||
# Install video dependencies.
|
# Install video dependencies.
|
||||||
RUN apt-get install -y \
|
RUN apt-get install -y \
|
||||||
|
@ -43,6 +43,7 @@ which
|
|||||||
# gstreamer1.0-plugins-good \
|
# gstreamer1.0-plugins-good \
|
||||||
# gstreamer1.0-plugins-ugly \
|
# gstreamer1.0-plugins-ugly \
|
||||||
# python3-gst-1.0 \
|
# python3-gst-1.0 \
|
||||||
|
# python3-numpy
|
||||||
|
|
||||||
# RUN apt-get install -y \
|
# RUN apt-get install -y \
|
||||||
# gir1.2-gst-plugins-base-1.0 \
|
# gir1.2-gst-plugins-base-1.0 \
|
||||||
|
@ -92,10 +92,11 @@ as whatever GStreamer plugins you want, good/bad/ugly):
|
|||||||
|
|
||||||
# Debian and co.
|
# Debian and co.
|
||||||
sudo apt install python3-gst-1.0 gstreamer1.0-plugins-{base,bad,good,ugly} \
|
sudo apt install python3-gst-1.0 gstreamer1.0-plugins-{base,bad,good,ugly} \
|
||||||
gstreamer1.0-libav
|
gstreamer1.0-libav python3-numpy
|
||||||
|
|
||||||
# Fedora and co.
|
# Fedora and co.
|
||||||
sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free}
|
sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free} \
|
||||||
|
python3-numpy
|
||||||
|
|
||||||
Add ``[[mediagoblin.media_types.audio]]`` under the ``[plugins]`` section in your
|
Add ``[[mediagoblin.media_types.audio]]`` under the ``[plugins]`` section in your
|
||||||
``mediagoblin.ini`` and update MediaGoblin::
|
``mediagoblin.ini`` and update MediaGoblin::
|
||||||
|
@ -30,6 +30,7 @@ carefully, or at least skim over it.
|
|||||||
**Improvements:**
|
**Improvements:**
|
||||||
|
|
||||||
- Drop Python 2 installation support (Ben Sturmfels)
|
- Drop Python 2 installation support (Ben Sturmfels)
|
||||||
|
- Reinstate Python 3 audio spectrograms [#5610] (Fernando Gutierrez)
|
||||||
|
|
||||||
**Bug fixes:**
|
**Bug fixes:**
|
||||||
|
|
||||||
|
@ -1,616 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# processing.py -- various audio processing functions
|
|
||||||
# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
|
|
||||||
# UNIVERSITAT POMPEU FABRA
|
|
||||||
#
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Affero General Public License as
|
|
||||||
# published by the Free Software Foundation, either version 3 of the
|
|
||||||
# License, or (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Affero General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
#
|
|
||||||
# Authors:
|
|
||||||
# Bram de Jong <bram.dejong at domain.com where domain in gmail>
|
|
||||||
# 2012, Joar Wandborg <first name at last name dot se>
|
|
||||||
|
|
||||||
from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport
|
|
||||||
from functools import partial
|
|
||||||
import math
|
|
||||||
import numpy
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import signal
|
|
||||||
|
|
||||||
|
|
||||||
def get_sound_type(input_filename):
|
|
||||||
sound_type = os.path.splitext(input_filename.lower())[1].strip(".")
|
|
||||||
|
|
||||||
if sound_type == "fla":
|
|
||||||
sound_type = "flac"
|
|
||||||
elif sound_type == "aif":
|
|
||||||
sound_type = "aiff"
|
|
||||||
|
|
||||||
return sound_type
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import scikits.audiolab as audiolab
|
|
||||||
except ImportError:
|
|
||||||
print "WARNING: audiolab is not installed so wav2png will not work"
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
class AudioProcessingException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class TestAudioFile(object):
|
|
||||||
"""A class that mimics audiolab.sndfile but generates noise instead of reading
|
|
||||||
a wave file. Additionally it can be told to have a "broken" header and thus crashing
|
|
||||||
in the middle of the file. Also useful for testing ultra-short files of 20 samples."""
|
|
||||||
def __init__(self, num_frames, has_broken_header=False):
|
|
||||||
self.seekpoint = 0
|
|
||||||
self.nframes = num_frames
|
|
||||||
self.samplerate = 44100
|
|
||||||
self.channels = 1
|
|
||||||
self.has_broken_header = has_broken_header
|
|
||||||
|
|
||||||
def seek(self, seekpoint):
|
|
||||||
self.seekpoint = seekpoint
|
|
||||||
|
|
||||||
def read_frames(self, frames_to_read):
|
|
||||||
if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2:
|
|
||||||
raise RuntimeError()
|
|
||||||
|
|
||||||
num_frames_left = self.num_frames - self.seekpoint
|
|
||||||
will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read
|
|
||||||
self.seekpoint += will_read
|
|
||||||
return numpy.random.random(will_read)*2 - 1
|
|
||||||
|
|
||||||
|
|
||||||
def get_max_level(filename):
|
|
||||||
max_value = 0
|
|
||||||
buffer_size = 4096
|
|
||||||
audio_file = audiolab.Sndfile(filename, 'r')
|
|
||||||
n_samples_left = audio_file.nframes
|
|
||||||
|
|
||||||
while n_samples_left:
|
|
||||||
to_read = min(buffer_size, n_samples_left)
|
|
||||||
|
|
||||||
try:
|
|
||||||
samples = audio_file.read_frames(to_read)
|
|
||||||
except RuntimeError:
|
|
||||||
# this can happen with a broken header
|
|
||||||
break
|
|
||||||
|
|
||||||
# convert to mono by selecting left channel only
|
|
||||||
if audio_file.channels > 1:
|
|
||||||
samples = samples[:,0]
|
|
||||||
|
|
||||||
max_value = max(max_value, numpy.abs(samples).max())
|
|
||||||
|
|
||||||
n_samples_left -= to_read
|
|
||||||
|
|
||||||
audio_file.close()
|
|
||||||
|
|
||||||
return max_value
|
|
||||||
|
|
||||||
class AudioProcessor(object):
|
|
||||||
"""
|
|
||||||
The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
|
|
||||||
samples in that chunk of audio.
|
|
||||||
"""
|
|
||||||
def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
|
|
||||||
max_level = get_max_level(input_filename)
|
|
||||||
|
|
||||||
self.audio_file = audiolab.Sndfile(input_filename, 'r')
|
|
||||||
self.fft_size = fft_size
|
|
||||||
self.window = window_function(self.fft_size)
|
|
||||||
self.spectrum_range = None
|
|
||||||
self.lower = 100
|
|
||||||
self.higher = 22050
|
|
||||||
self.lower_log = math.log10(self.lower)
|
|
||||||
self.higher_log = math.log10(self.higher)
|
|
||||||
self.clip = lambda val, low, high: min(high, max(low, val))
|
|
||||||
|
|
||||||
# figure out what the maximum value is for an FFT doing the FFT of a DC signal
|
|
||||||
fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
|
|
||||||
max_fft = (numpy.abs(fft)).max()
|
|
||||||
# set the scale to normalized audio and normalized FFT
|
|
||||||
self.scale = 1.0/max_level/max_fft if max_level > 0 else 1
|
|
||||||
|
|
||||||
def read(self, start, size, resize_if_less=False):
|
|
||||||
""" read size samples starting at start, if resize_if_less is True and less than size
|
|
||||||
samples are read, resize the array to size and fill with zeros """
|
|
||||||
|
|
||||||
# number of zeros to add to start and end of the buffer
|
|
||||||
add_to_start = 0
|
|
||||||
add_to_end = 0
|
|
||||||
|
|
||||||
if start < 0:
|
|
||||||
# the first FFT window starts centered around zero
|
|
||||||
if size + start <= 0:
|
|
||||||
return numpy.zeros(size) if resize_if_less else numpy.array([])
|
|
||||||
else:
|
|
||||||
self.audio_file.seek(0)
|
|
||||||
|
|
||||||
add_to_start = -start # remember: start is negative!
|
|
||||||
to_read = size + start
|
|
||||||
|
|
||||||
if to_read > self.audio_file.nframes:
|
|
||||||
add_to_end = to_read - self.audio_file.nframes
|
|
||||||
to_read = self.audio_file.nframes
|
|
||||||
else:
|
|
||||||
self.audio_file.seek(start)
|
|
||||||
|
|
||||||
to_read = size
|
|
||||||
if start + to_read >= self.audio_file.nframes:
|
|
||||||
to_read = self.audio_file.nframes - start
|
|
||||||
add_to_end = size - to_read
|
|
||||||
|
|
||||||
try:
|
|
||||||
samples = self.audio_file.read_frames(to_read)
|
|
||||||
except RuntimeError:
|
|
||||||
# this can happen for wave files with broken headers...
|
|
||||||
return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
|
|
||||||
|
|
||||||
# convert to mono by selecting left channel only
|
|
||||||
if self.audio_file.channels > 1:
|
|
||||||
samples = samples[:,0]
|
|
||||||
|
|
||||||
if resize_if_less and (add_to_start > 0 or add_to_end > 0):
|
|
||||||
if add_to_start > 0:
|
|
||||||
samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
|
|
||||||
|
|
||||||
if add_to_end > 0:
|
|
||||||
samples = numpy.resize(samples, size)
|
|
||||||
samples[size - add_to_end:] = 0
|
|
||||||
|
|
||||||
return samples
|
|
||||||
|
|
||||||
|
|
||||||
def spectral_centroid(self, seek_point, spec_range=110.0):
|
|
||||||
""" starting at seek_point read fft_size samples, and calculate the spectral centroid """
|
|
||||||
|
|
||||||
samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
|
|
||||||
|
|
||||||
samples *= self.window
|
|
||||||
fft = numpy.fft.rfft(samples)
|
|
||||||
spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
|
|
||||||
length = numpy.float64(spectrum.shape[0])
|
|
||||||
|
|
||||||
# scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
|
|
||||||
db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
|
|
||||||
|
|
||||||
energy = spectrum.sum()
|
|
||||||
spectral_centroid = 0
|
|
||||||
|
|
||||||
if energy > 1e-60:
|
|
||||||
# calculate the spectral centroid
|
|
||||||
|
|
||||||
if self.spectrum_range == None:
|
|
||||||
self.spectrum_range = numpy.arange(length)
|
|
||||||
|
|
||||||
spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
|
|
||||||
|
|
||||||
# clip > log10 > scale between 0 and 1
|
|
||||||
spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
|
|
||||||
|
|
||||||
return (spectral_centroid, db_spectrum)
|
|
||||||
|
|
||||||
|
|
||||||
def peaks(self, start_seek, end_seek):
|
|
||||||
""" read all samples between start_seek and end_seek, then find the minimum and maximum peak
|
|
||||||
in that range. Returns that pair in the order they were found. So if min was found first,
|
|
||||||
it returns (min, max) else the other way around. """
|
|
||||||
|
|
||||||
# larger blocksizes are faster but take more mem...
|
|
||||||
# Aha, Watson, a clue, a tradeof!
|
|
||||||
block_size = 4096
|
|
||||||
|
|
||||||
max_index = -1
|
|
||||||
max_value = -1
|
|
||||||
min_index = -1
|
|
||||||
min_value = 1
|
|
||||||
|
|
||||||
if start_seek < 0:
|
|
||||||
start_seek = 0
|
|
||||||
|
|
||||||
if end_seek > self.audio_file.nframes:
|
|
||||||
end_seek = self.audio_file.nframes
|
|
||||||
|
|
||||||
if end_seek <= start_seek:
|
|
||||||
samples = self.read(start_seek, 1)
|
|
||||||
return (samples[0], samples[0])
|
|
||||||
|
|
||||||
if block_size > end_seek - start_seek:
|
|
||||||
block_size = end_seek - start_seek
|
|
||||||
|
|
||||||
for i in range(start_seek, end_seek, block_size):
|
|
||||||
samples = self.read(i, block_size)
|
|
||||||
|
|
||||||
local_max_index = numpy.argmax(samples)
|
|
||||||
local_max_value = samples[local_max_index]
|
|
||||||
|
|
||||||
if local_max_value > max_value:
|
|
||||||
max_value = local_max_value
|
|
||||||
max_index = local_max_index
|
|
||||||
|
|
||||||
local_min_index = numpy.argmin(samples)
|
|
||||||
local_min_value = samples[local_min_index]
|
|
||||||
|
|
||||||
if local_min_value < min_value:
|
|
||||||
min_value = local_min_value
|
|
||||||
min_index = local_min_index
|
|
||||||
|
|
||||||
return (min_value, max_value) if min_index < max_index else (max_value, min_value)
|
|
||||||
|
|
||||||
|
|
||||||
def interpolate_colors(colors, flat=False, num_colors=256):
|
|
||||||
""" given a list of colors, create a larger list of colors interpolating
|
|
||||||
the first one. If flatten is True a list of numers will be returned. If
|
|
||||||
False, a list of (r,g,b) tuples. num_colors is the number of colors wanted
|
|
||||||
in the final list """
|
|
||||||
|
|
||||||
palette = []
|
|
||||||
|
|
||||||
for i in range(num_colors):
|
|
||||||
index = (i * (len(colors) - 1))/(num_colors - 1.0)
|
|
||||||
index_int = int(index)
|
|
||||||
alpha = index - float(index_int)
|
|
||||||
|
|
||||||
if alpha > 0:
|
|
||||||
r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0]
|
|
||||||
g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1]
|
|
||||||
b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2]
|
|
||||||
else:
|
|
||||||
r = (1.0 - alpha) * colors[index_int][0]
|
|
||||||
g = (1.0 - alpha) * colors[index_int][1]
|
|
||||||
b = (1.0 - alpha) * colors[index_int][2]
|
|
||||||
|
|
||||||
if flat:
|
|
||||||
palette.extend((int(r), int(g), int(b)))
|
|
||||||
else:
|
|
||||||
palette.append((int(r), int(g), int(b)))
|
|
||||||
|
|
||||||
return palette
|
|
||||||
|
|
||||||
|
|
||||||
def desaturate(rgb, amount):
|
|
||||||
"""
|
|
||||||
desaturate colors by amount
|
|
||||||
amount == 0, no change
|
|
||||||
amount == 1, grey
|
|
||||||
"""
|
|
||||||
luminosity = sum(rgb) / 3.0
|
|
||||||
desat = lambda color: color - amount * (color - luminosity)
|
|
||||||
|
|
||||||
return tuple(map(int, map(desat, rgb)))
|
|
||||||
|
|
||||||
|
|
||||||
class WaveformImage(object):
|
|
||||||
"""
|
|
||||||
Given peaks and spectral centroids from the AudioProcessor, this class will construct
|
|
||||||
a wavefile image which can be saved as PNG.
|
|
||||||
"""
|
|
||||||
def __init__(self, image_width, image_height, palette=1):
|
|
||||||
if image_height % 2 == 0:
|
|
||||||
raise AudioProcessingException("Height should be uneven: images look much better at uneven height")
|
|
||||||
|
|
||||||
if palette == 1:
|
|
||||||
background_color = (0,0,0)
|
|
||||||
colors = [
|
|
||||||
(50,0,200),
|
|
||||||
(0,220,80),
|
|
||||||
(255,224,0),
|
|
||||||
(255,70,0),
|
|
||||||
]
|
|
||||||
elif palette == 2:
|
|
||||||
background_color = (0,0,0)
|
|
||||||
colors = [self.color_from_value(value/29.0) for value in range(0,30)]
|
|
||||||
elif palette == 3:
|
|
||||||
background_color = (213, 217, 221)
|
|
||||||
colors = map( partial(desaturate, amount=0.7), [
|
|
||||||
(50,0,200),
|
|
||||||
(0,220,80),
|
|
||||||
(255,224,0),
|
|
||||||
])
|
|
||||||
elif palette == 4:
|
|
||||||
background_color = (213, 217, 221)
|
|
||||||
colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)])
|
|
||||||
|
|
||||||
self.image = Image.new("RGB", (image_width, image_height), background_color)
|
|
||||||
|
|
||||||
self.image_width = image_width
|
|
||||||
self.image_height = image_height
|
|
||||||
|
|
||||||
self.draw = ImageDraw.Draw(self.image)
|
|
||||||
self.previous_x, self.previous_y = None, None
|
|
||||||
|
|
||||||
self.color_lookup = interpolate_colors(colors)
|
|
||||||
self.pix = self.image.load()
|
|
||||||
|
|
||||||
def color_from_value(self, value):
|
|
||||||
""" given a value between 0 and 1, return an (r,g,b) tuple """
|
|
||||||
|
|
||||||
return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50))
|
|
||||||
|
|
||||||
def draw_peaks(self, x, peaks, spectral_centroid):
|
|
||||||
""" draw 2 peaks at x using the spectral_centroid for color """
|
|
||||||
|
|
||||||
y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5
|
|
||||||
y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5
|
|
||||||
|
|
||||||
line_color = self.color_lookup[int(spectral_centroid*255.0)]
|
|
||||||
|
|
||||||
if self.previous_y != None:
|
|
||||||
self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color)
|
|
||||||
else:
|
|
||||||
self.draw.line([x, y1, x, y2], line_color)
|
|
||||||
|
|
||||||
self.previous_x, self.previous_y = x, y2
|
|
||||||
|
|
||||||
self.draw_anti_aliased_pixels(x, y1, y2, line_color)
|
|
||||||
|
|
||||||
def draw_anti_aliased_pixels(self, x, y1, y2, color):
|
|
||||||
""" vertical anti-aliasing at y1 and y2 """
|
|
||||||
|
|
||||||
y_max = max(y1, y2)
|
|
||||||
y_max_int = int(y_max)
|
|
||||||
alpha = y_max - y_max_int
|
|
||||||
|
|
||||||
if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height:
|
|
||||||
current_pix = self.pix[x, y_max_int + 1]
|
|
||||||
|
|
||||||
r = int((1-alpha)*current_pix[0] + alpha*color[0])
|
|
||||||
g = int((1-alpha)*current_pix[1] + alpha*color[1])
|
|
||||||
b = int((1-alpha)*current_pix[2] + alpha*color[2])
|
|
||||||
|
|
||||||
self.pix[x, y_max_int + 1] = (r,g,b)
|
|
||||||
|
|
||||||
y_min = min(y1, y2)
|
|
||||||
y_min_int = int(y_min)
|
|
||||||
alpha = 1.0 - (y_min - y_min_int)
|
|
||||||
|
|
||||||
if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0:
|
|
||||||
current_pix = self.pix[x, y_min_int - 1]
|
|
||||||
|
|
||||||
r = int((1-alpha)*current_pix[0] + alpha*color[0])
|
|
||||||
g = int((1-alpha)*current_pix[1] + alpha*color[1])
|
|
||||||
b = int((1-alpha)*current_pix[2] + alpha*color[2])
|
|
||||||
|
|
||||||
self.pix[x, y_min_int - 1] = (r,g,b)
|
|
||||||
|
|
||||||
def save(self, filename):
|
|
||||||
# draw a zero "zero" line
|
|
||||||
a = 25
|
|
||||||
for x in range(self.image_width):
|
|
||||||
self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2]))
|
|
||||||
|
|
||||||
self.image.save(filename)
|
|
||||||
|
|
||||||
|
|
||||||
class SpectrogramImage(object):
|
|
||||||
"""
|
|
||||||
Given spectra from the AudioProcessor, this class will construct a wavefile image which
|
|
||||||
can be saved as PNG.
|
|
||||||
"""
|
|
||||||
def __init__(self, image_width, image_height, fft_size):
|
|
||||||
self.image_width = image_width
|
|
||||||
self.image_height = image_height
|
|
||||||
self.fft_size = fft_size
|
|
||||||
|
|
||||||
self.image = Image.new("RGBA", (image_height, image_width))
|
|
||||||
|
|
||||||
colors = [
|
|
||||||
(0, 0, 0, 0),
|
|
||||||
(58/4, 68/4, 65/4, 255),
|
|
||||||
(80/2, 100/2, 153/2, 255),
|
|
||||||
(90, 180, 100, 255),
|
|
||||||
(224, 224, 44, 255),
|
|
||||||
(255, 60, 30, 255),
|
|
||||||
(255, 255, 255, 255)
|
|
||||||
]
|
|
||||||
self.palette = interpolate_colors(colors)
|
|
||||||
|
|
||||||
# generate the lookup which translates y-coordinate to fft-bin
|
|
||||||
self.y_to_bin = []
|
|
||||||
f_min = 100.0
|
|
||||||
f_max = 22050.0
|
|
||||||
y_min = math.log10(f_min)
|
|
||||||
y_max = math.log10(f_max)
|
|
||||||
for y in range(self.image_height):
|
|
||||||
freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min))
|
|
||||||
bin = freq / 22050.0 * (self.fft_size/2 + 1)
|
|
||||||
|
|
||||||
if bin < self.fft_size/2:
|
|
||||||
alpha = bin - int(bin)
|
|
||||||
|
|
||||||
self.y_to_bin.append((int(bin), alpha * 255))
|
|
||||||
|
|
||||||
# this is a bit strange, but using image.load()[x,y] = ... is
|
|
||||||
# a lot slower than using image.putadata and then rotating the image
|
|
||||||
# so we store all the pixels in an array and then create the image when saving
|
|
||||||
self.pixels = []
|
|
||||||
|
|
||||||
def draw_spectrum(self, x, spectrum):
|
|
||||||
# for all frequencies, draw the pixels
|
|
||||||
for (index, alpha) in self.y_to_bin:
|
|
||||||
self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] )
|
|
||||||
|
|
||||||
# if the FFT is too small to fill up the image, fill with black to the top
|
|
||||||
for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable
|
|
||||||
self.pixels.append(self.palette[0])
|
|
||||||
|
|
||||||
def save(self, filename, quality=80):
|
|
||||||
assert filename.lower().endswith(".jpg")
|
|
||||||
self.image.putdata(self.pixels)
|
|
||||||
self.image.transpose(Image.ROTATE_90).save(filename, quality=quality)
|
|
||||||
|
|
||||||
|
|
||||||
def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None):
|
|
||||||
"""
|
|
||||||
Utility function for creating both wavefile and spectrum images from an audio input file.
|
|
||||||
"""
|
|
||||||
processor = AudioProcessor(input_filename, fft_size, numpy.hanning)
|
|
||||||
samples_per_pixel = processor.audio_file.nframes / float(image_width)
|
|
||||||
|
|
||||||
waveform = WaveformImage(image_width, image_height)
|
|
||||||
spectrogram = SpectrogramImage(image_width, image_height, fft_size)
|
|
||||||
|
|
||||||
for x in range(image_width):
|
|
||||||
|
|
||||||
if progress_callback and x % (image_width/10) == 0:
|
|
||||||
progress_callback((x*100)/image_width)
|
|
||||||
|
|
||||||
seek_point = int(x * samples_per_pixel)
|
|
||||||
next_seek_point = int((x + 1) * samples_per_pixel)
|
|
||||||
|
|
||||||
(spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
|
|
||||||
peaks = processor.peaks(seek_point, next_seek_point)
|
|
||||||
|
|
||||||
waveform.draw_peaks(x, peaks, spectral_centroid)
|
|
||||||
spectrogram.draw_spectrum(x, db_spectrum)
|
|
||||||
|
|
||||||
if progress_callback:
|
|
||||||
progress_callback(100)
|
|
||||||
|
|
||||||
waveform.save(output_filename_w)
|
|
||||||
spectrogram.save(output_filename_s)
|
|
||||||
|
|
||||||
|
|
||||||
class NoSpaceLeftException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def convert_to_pcm(input_filename, output_filename):
|
|
||||||
"""
|
|
||||||
converts any audio file type to pcm audio
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not os.path.exists(input_filename):
|
|
||||||
raise AudioProcessingException("file %s does not exist" % input_filename)
|
|
||||||
|
|
||||||
sound_type = get_sound_type(input_filename)
|
|
||||||
|
|
||||||
if sound_type == "mp3":
|
|
||||||
cmd = ["lame", "--decode", input_filename, output_filename]
|
|
||||||
elif sound_type == "ogg":
|
|
||||||
cmd = ["oggdec", input_filename, "-o", output_filename]
|
|
||||||
elif sound_type == "flac":
|
|
||||||
cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename]
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
(stdout, stderr) = process.communicate()
|
|
||||||
|
|
||||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
|
||||||
if "No space left on device" in stderr + " " + stdout:
|
|
||||||
raise NoSpaceLeftException
|
|
||||||
raise AudioProcessingException("failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename):
|
|
||||||
"""
|
|
||||||
converts a pcm wave file to two channel, 16 bit integer
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not os.path.exists(input_filename):
|
|
||||||
raise AudioProcessingException("file %s does not exist" % input_filename)
|
|
||||||
|
|
||||||
cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename]
|
|
||||||
|
|
||||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
(stdout, stderr) = process.communicate()
|
|
||||||
|
|
||||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
|
||||||
if "No space left on device" in stderr + " " + stdout:
|
|
||||||
raise NoSpaceLeftException
|
|
||||||
raise AudioProcessingException("failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
|
|
||||||
|
|
||||||
stdout = (stdout + " " + stderr).replace("\n", " ")
|
|
||||||
|
|
||||||
duration = 0
|
|
||||||
m = re.match(r".*#duration (?P<duration>[\d\.]+).*", stdout)
|
|
||||||
if m != None:
|
|
||||||
duration = float(m.group("duration"))
|
|
||||||
|
|
||||||
channels = 0
|
|
||||||
m = re.match(r".*#channels (?P<channels>\d+).*", stdout)
|
|
||||||
if m != None:
|
|
||||||
channels = float(m.group("channels"))
|
|
||||||
|
|
||||||
samplerate = 0
|
|
||||||
m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout)
|
|
||||||
if m != None:
|
|
||||||
samplerate = float(m.group("samplerate"))
|
|
||||||
|
|
||||||
bitdepth = None
|
|
||||||
m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout)
|
|
||||||
if m != None:
|
|
||||||
bitdepth = float(m.group("bitdepth"))
|
|
||||||
|
|
||||||
bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0
|
|
||||||
|
|
||||||
return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_mp3(input_filename, output_filename, quality=70):
|
|
||||||
"""
|
|
||||||
converts the incoming wave file to a mp3 file
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not os.path.exists(input_filename):
|
|
||||||
raise AudioProcessingException("file %s does not exist" % input_filename)
|
|
||||||
|
|
||||||
command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename]
|
|
||||||
|
|
||||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
(stdout, stderr) = process.communicate()
|
|
||||||
|
|
||||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
|
||||||
raise AudioProcessingException(stdout)
|
|
||||||
|
|
||||||
def convert_to_ogg(input_filename, output_filename, quality=1):
|
|
||||||
"""
|
|
||||||
converts the incoming wave file to n ogg file
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not os.path.exists(input_filename):
|
|
||||||
raise AudioProcessingException("file %s does not exist" % input_filename)
|
|
||||||
|
|
||||||
command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename]
|
|
||||||
|
|
||||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
(stdout, stderr) = process.communicate()
|
|
||||||
|
|
||||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
|
||||||
raise AudioProcessingException(stdout)
|
|
||||||
|
|
||||||
def convert_using_ffmpeg(input_filename, output_filename):
|
|
||||||
"""
|
|
||||||
converts the incoming wave file to stereo pcm using fffmpeg
|
|
||||||
"""
|
|
||||||
TIMEOUT = 3 * 60
|
|
||||||
def alarm_handler(signum, frame):
|
|
||||||
raise AudioProcessingException("timeout while waiting for ffmpeg")
|
|
||||||
|
|
||||||
if not os.path.exists(input_filename):
|
|
||||||
raise AudioProcessingException("file %s does not exist" % input_filename)
|
|
||||||
|
|
||||||
command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename]
|
|
||||||
|
|
||||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
signal.signal(signal.SIGALRM,alarm_handler)
|
|
||||||
signal.alarm(TIMEOUT)
|
|
||||||
(stdout, stderr) = process.communicate()
|
|
||||||
signal.alarm(0)
|
|
||||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
|
||||||
raise AudioProcessingException(stdout)
|
|
@ -1 +0,0 @@
|
|||||||
../../../extlib/freesound/audioprocessing.py
|
|
297
mediagoblin/media_types/audio/audiotospectrogram.py
Normal file
297
mediagoblin/media_types/audio/audiotospectrogram.py
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
# GNU MediaGoblin -- federated, autonomous media hosting
|
||||||
|
# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
import soundfile
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but
|
||||||
|
# usually there isn't much detail in higher frequencies
|
||||||
|
SPECTROGRAM_MIN_FREQUENCY = 20
|
||||||
|
SPECTROGRAM_DB_RANGE = 110
|
||||||
|
# Color palette copied from old spectrogram.py
|
||||||
|
SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4),
|
||||||
|
(80 / 2, 100 / 2, 153 / 2),
|
||||||
|
(90, 180, 100),
|
||||||
|
(224, 224, 44),
|
||||||
|
(255, 60, 30),
|
||||||
|
(255, 255, 255)]
|
||||||
|
# The purpose of this table is to give more horizontal
|
||||||
|
# real estate to shorter sounds files.
|
||||||
|
# Format: (pixels, (range_min, range_max))
|
||||||
|
# For sounds with a duration >= _range_min_ and < _range_max_
|
||||||
|
# give _pixel_ horizontal pixels for each second of audio.
|
||||||
|
SPECTROGRAM_WIDTH_PERSECOND = [(240, ( 0, 20)),
|
||||||
|
(120, ( 20, 30)),
|
||||||
|
( 60, ( 30, 60)),
|
||||||
|
( 30, ( 60, 120)),
|
||||||
|
( 15, (120, 240)),
|
||||||
|
( 6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer
|
||||||
|
# duration will still get assigned to the last bucket
|
||||||
|
SPECTROGRAM_HEIGHT = 500
|
||||||
|
|
||||||
|
class AudioBlocksFFT:
|
||||||
|
|
||||||
|
def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning):
|
||||||
|
self.audioData = soundfile.SoundFile(fileName, 'r')
|
||||||
|
self.numChannels = self.audioData.channels
|
||||||
|
self.sampleRate = self.audioData.samplerate
|
||||||
|
self.minFreq = minFreq
|
||||||
|
self.maxFreq = maxFreq
|
||||||
|
self.blockSize = blockSize
|
||||||
|
self.numBins = numBins
|
||||||
|
self.overlap = overlap
|
||||||
|
self.windowValues = windowFunction(blockSize)
|
||||||
|
self.peakFFTValue = 0
|
||||||
|
try:
|
||||||
|
# PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__()
|
||||||
|
self.totalSamples = self.audioData.frames
|
||||||
|
except AttributeError:
|
||||||
|
self.totalSamples = len(self.audioData)
|
||||||
|
|
||||||
|
def peakFFTAmplitude(self):
|
||||||
|
"""
|
||||||
|
Peak amplitude of FFT for all blocks
|
||||||
|
"""
|
||||||
|
return self.peakFFTValue
|
||||||
|
|
||||||
|
def totalSeconds(self):
|
||||||
|
"""
|
||||||
|
Total length in seconds
|
||||||
|
"""
|
||||||
|
return self.totalSamples / self.sampleRate
|
||||||
|
|
||||||
|
def _filterFreqRange(self, fftAmplitude):
|
||||||
|
"""
|
||||||
|
Given a FFT amplitudes array keep only bins between minFreq, maxFreq
|
||||||
|
"""
|
||||||
|
nyquistFreq = self.sampleRate // 2
|
||||||
|
numBins = len(fftAmplitude)
|
||||||
|
sliceWidth = nyquistFreq / numBins
|
||||||
|
startIdx = int(self.minFreq / sliceWidth)
|
||||||
|
endIdx = int(self.maxFreq / sliceWidth)
|
||||||
|
if numBins <= endIdx:
|
||||||
|
fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0))
|
||||||
|
else:
|
||||||
|
fftAmplitude = fftAmplitude[:endIdx + 1]
|
||||||
|
return fftAmplitude[startIdx:]
|
||||||
|
|
||||||
|
def _resizeAmplitudeArray(self, amplitudeValues, newSize):
|
||||||
|
"""
|
||||||
|
Resize amplitude values array
|
||||||
|
"""
|
||||||
|
if len(amplitudeValues) == newSize:
|
||||||
|
return amplitudeValues
|
||||||
|
if newSize > len(amplitudeValues):
|
||||||
|
# Resize up
|
||||||
|
result = numpy.zeros(newSize)
|
||||||
|
for idx in range(0, newSize):
|
||||||
|
srcIdx = (idx * len(amplitudeValues)) // newSize
|
||||||
|
result[idx] = amplitudeValues[srcIdx]
|
||||||
|
return result
|
||||||
|
# Resize down keeping peaks
|
||||||
|
result = numpy.zeros(newSize)
|
||||||
|
idx = 0
|
||||||
|
for slice in numpy.array_split(amplitudeValues, newSize):
|
||||||
|
result[idx] = slice.max()
|
||||||
|
idx = idx + 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
"""
|
||||||
|
Read a block of audio data and compute FFT amplitudes
|
||||||
|
"""
|
||||||
|
self.audioData.seek(0)
|
||||||
|
for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap):
|
||||||
|
# Mix down all channels to mono
|
||||||
|
audioBlock = fileBlock[:,0]
|
||||||
|
for channel in range(1, self.numChannels):
|
||||||
|
audioBlock = numpy.add(audioBlock, fileBlock[:,channel])
|
||||||
|
# On the last block it may be necessary to pad with zeros
|
||||||
|
if len(audioBlock) < self.blockSize:
|
||||||
|
audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0))
|
||||||
|
# Compute FFT amplitude of this block
|
||||||
|
fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues)))
|
||||||
|
self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max())
|
||||||
|
# Resize if requested
|
||||||
|
if not self.numBins is None:
|
||||||
|
fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins)
|
||||||
|
yield (fftAmplitude, self.audioData.tell() / self.sampleRate)
|
||||||
|
|
||||||
|
class SpectrogramColorMap:
|
||||||
|
|
||||||
|
def __init__(self, columnData):
|
||||||
|
self.columnData = columnData
|
||||||
|
self.width = len(columnData)
|
||||||
|
self.height = len(columnData[0])
|
||||||
|
self._buildColorPalette()
|
||||||
|
|
||||||
|
def _colorBetween(self, beginColor, endColor, step):
|
||||||
|
"""
|
||||||
|
Interpolate between two colors
|
||||||
|
"""
|
||||||
|
rS, gS, bS = beginColor
|
||||||
|
rE, gE, bE = endColor
|
||||||
|
r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE)))
|
||||||
|
g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE)))
|
||||||
|
b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE)))
|
||||||
|
r = r if r < 256 else 255
|
||||||
|
g = g if g < 256 else 255
|
||||||
|
b = b if b < 256 else 255
|
||||||
|
return (r, g, b)
|
||||||
|
|
||||||
|
def _buildColorPalette(self):
|
||||||
|
"""
|
||||||
|
Build color palette
|
||||||
|
"""
|
||||||
|
colorPoints = SPECTROGRAM_COLORS
|
||||||
|
self.colors = []
|
||||||
|
for i in range(1, len(colorPoints)):
|
||||||
|
for p in range(0, 200):
|
||||||
|
self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200))
|
||||||
|
|
||||||
|
def getColorData(self, progressCallback = None):
|
||||||
|
"""
|
||||||
|
Map spectrogram data to pixel colors
|
||||||
|
"""
|
||||||
|
pixels = [self.colors[0]] * (self.width * self.height)
|
||||||
|
for x in range(0, self.width):
|
||||||
|
for y in range(0, self.height):
|
||||||
|
idx = x + self.width * y
|
||||||
|
amplitudeVal = self.columnData[x][self.height - y - 1]
|
||||||
|
colorIdx = int(len(self.colors) * amplitudeVal)
|
||||||
|
colorIdx = colorIdx if colorIdx > 0 else 0
|
||||||
|
colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1
|
||||||
|
pixels[idx] = self.colors[colorIdx]
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(100 * x / self.width)
|
||||||
|
return pixels
|
||||||
|
|
||||||
|
def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None):
|
||||||
|
"""
|
||||||
|
Draw a spectrogram of the audio file
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Fraction of total work for each step
|
||||||
|
STEP_PERCENTAGE_FFT = 40
|
||||||
|
STEP_PERCENTAGE_NORMALIZE = 5
|
||||||
|
STEP_PERCENTAGE_ACCUMULATE = 10
|
||||||
|
STEP_PERCENTAGE_DRAW = 40
|
||||||
|
# Give last 5% to saving the file
|
||||||
|
|
||||||
|
PERCENTAGE_REPORT_STEP = 2
|
||||||
|
|
||||||
|
nextReportedPercentage = PERCENTAGE_REPORT_STEP
|
||||||
|
def wrapProgressCallback(percentage):
|
||||||
|
nonlocal nextReportedPercentage
|
||||||
|
percentage = int(percentage)
|
||||||
|
if percentage >= nextReportedPercentage:
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(percentage)
|
||||||
|
nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP
|
||||||
|
|
||||||
|
def mapColorsProgressCallback(percentage):
|
||||||
|
wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE
|
||||||
|
+ (STEP_PERCENTAGE_DRAW * (percentage / 100)))
|
||||||
|
|
||||||
|
imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND
|
||||||
|
imageHeight = SPECTROGRAM_HEIGHT
|
||||||
|
|
||||||
|
# Load audio file and compute FFT amplitudes
|
||||||
|
fftBlocksSource = AudioBlocksFFT(audioFileName,
|
||||||
|
fftSize, overlap = fftOverlap,
|
||||||
|
minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY,
|
||||||
|
numBins = imageHeight)
|
||||||
|
soundLength = fftBlocksSource.totalSeconds()
|
||||||
|
fftAmplitudeBlocks = []
|
||||||
|
for fftAmplitude, positionSeconds in fftBlocksSource:
|
||||||
|
fftAmplitudeBlocks.append(fftAmplitude)
|
||||||
|
wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength))
|
||||||
|
|
||||||
|
totalProgress = STEP_PERCENTAGE_FFT
|
||||||
|
|
||||||
|
# Normalize FFT amplitude and convert to log scale
|
||||||
|
specRange = SPECTROGRAM_DB_RANGE
|
||||||
|
for i in range(0, len(fftAmplitudeBlocks)):
|
||||||
|
normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude())
|
||||||
|
fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange
|
||||||
|
wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks)))
|
||||||
|
|
||||||
|
totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE
|
||||||
|
|
||||||
|
# Compute spectrogram width in pixels
|
||||||
|
imageWidthPerSecond, lengthRage = imageWidthLookup[-1]
|
||||||
|
for widthPerSecond, lengthLimit in imageWidthLookup:
|
||||||
|
limitLow, limitHigh = lengthLimit
|
||||||
|
if soundLength > limitLow and soundLength <= limitHigh:
|
||||||
|
imageWidthPerSecond = widthPerSecond
|
||||||
|
break
|
||||||
|
imageWidth = int(imageWidthPerSecond * soundLength)
|
||||||
|
|
||||||
|
# Compute spectrogram values
|
||||||
|
columnValues = numpy.zeros(imageHeight)
|
||||||
|
spectrogram = []
|
||||||
|
x = 0
|
||||||
|
for idx in range(0, len(fftAmplitudeBlocks)):
|
||||||
|
newX = (idx * imageWidth) // len(fftAmplitudeBlocks)
|
||||||
|
if newX != x:
|
||||||
|
# Save column
|
||||||
|
spectrogram.append(numpy.copy(columnValues))
|
||||||
|
x = newX
|
||||||
|
columnValues.fill(0)
|
||||||
|
columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx])
|
||||||
|
wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks)))
|
||||||
|
spectrogram.append(numpy.copy(columnValues))
|
||||||
|
|
||||||
|
totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE
|
||||||
|
|
||||||
|
# Draw spectrogram
|
||||||
|
imageWidth = len(spectrogram)
|
||||||
|
colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback)
|
||||||
|
|
||||||
|
totalProgress = totalProgress + STEP_PERCENTAGE_DRAW
|
||||||
|
|
||||||
|
# Save final image
|
||||||
|
image = Image.new('RGB', (imageWidth, imageHeight))
|
||||||
|
image.putdata(colorData)
|
||||||
|
image.save(imageFileName)
|
||||||
|
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(100)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def printProgress(p):
|
||||||
|
sys.stdout.write("\rProgress : {}%".format(p))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if not (len(sys.argv) == 2 or len(sys.argv) == 3):
|
||||||
|
print("Usage:\n{0} input_file [output_file]".format(sys.argv[0]))
|
||||||
|
exit()
|
||||||
|
|
||||||
|
audioFile = sys.argv[1]
|
||||||
|
|
||||||
|
if 3 == len(sys.argv):
|
||||||
|
outputFile = sys.argv[2]
|
||||||
|
else:
|
||||||
|
outputFile = 'spectrogram.png'
|
||||||
|
|
||||||
|
sys.stdout.write("Input : {0}\nOutput : {1}\n".format(audioFile, outputFile))
|
||||||
|
drawSpectrogram(audioFile, outputFile, progressCallback = printProgress)
|
||||||
|
sys.stdout.write("\nDone!\n")
|
@ -1,362 +0,0 @@
|
|||||||
# processing.py -- various audio processing functions
|
|
||||||
# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
|
|
||||||
# UNIVERSITAT POMPEU FABRA
|
|
||||||
#
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Affero General Public License as
|
|
||||||
# published by the Free Software Foundation, either version 3 of the
|
|
||||||
# License, or (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Affero General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
#
|
|
||||||
# Authors:
|
|
||||||
# Bram de Jong <bram.dejong at domain.com where domain in gmail>
|
|
||||||
# 2012, Joar Wandborg <first name at last name dot se>
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
try:
|
|
||||||
from PIL import Image
|
|
||||||
except ImportError:
|
|
||||||
import Image
|
|
||||||
import math
|
|
||||||
import numpy
|
|
||||||
|
|
||||||
try:
|
|
||||||
import scikits.audiolab as audiolab
|
|
||||||
except ImportError:
|
|
||||||
print("WARNING: audiolab is not installed so wav2png will not work")
|
|
||||||
|
|
||||||
|
|
||||||
class AudioProcessingException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SpectrogramImage(object):
|
|
||||||
def __init__(self, image_size, fft_size):
|
|
||||||
self.image_width, self.image_height = image_size
|
|
||||||
self.fft_size = fft_size
|
|
||||||
|
|
||||||
colors = [
|
|
||||||
(0, 0, 0, 0),
|
|
||||||
(58 / 4, 68 / 4, 65 / 4, 255),
|
|
||||||
(80 / 2, 100 / 2, 153 / 2, 255),
|
|
||||||
(90, 180, 100, 255),
|
|
||||||
(224, 224, 44, 255),
|
|
||||||
(255, 60, 30, 255),
|
|
||||||
(255, 255, 255, 255)
|
|
||||||
]
|
|
||||||
|
|
||||||
self.palette = interpolate_colors(colors)
|
|
||||||
|
|
||||||
# Generate lookup table for y-coordinate from fft-bin
|
|
||||||
self.y_to_bin = []
|
|
||||||
|
|
||||||
fft_min = 100.0
|
|
||||||
fft_max = 22050.0 # kHz?
|
|
||||||
|
|
||||||
y_min = math.log10(fft_min)
|
|
||||||
y_max = math.log10(fft_max)
|
|
||||||
|
|
||||||
for y in range(self.image_height):
|
|
||||||
freq = math.pow(
|
|
||||||
10.0,
|
|
||||||
y_min + y / (self.image_height - 1.0)
|
|
||||||
* (y_max - y_min))
|
|
||||||
|
|
||||||
fft_bin = freq / fft_max * (self.fft_size / 2 + 1)
|
|
||||||
|
|
||||||
if fft_bin < self.fft_size / 2:
|
|
||||||
alpha = fft_bin - int(fft_bin)
|
|
||||||
|
|
||||||
self.y_to_bin.append((int(fft_bin), alpha * 255))
|
|
||||||
|
|
||||||
# this is a bit strange, but using image.load()[x,y] = ... is
|
|
||||||
# a lot slower than using image.putadata and then rotating the image
|
|
||||||
# so we store all the pixels in an array and then create the image when saving
|
|
||||||
self.pixels = []
|
|
||||||
|
|
||||||
def draw_spectrum(self, x, spectrum):
|
|
||||||
# for all frequencies, draw the pixels
|
|
||||||
for index, alpha in self.y_to_bin:
|
|
||||||
self.pixels.append(
|
|
||||||
self.palette[int((255.0 - alpha) * spectrum[index]
|
|
||||||
+ alpha * spectrum[index + 1])])
|
|
||||||
|
|
||||||
# if the FFT is too small to fill up the image, fill with black to the top
|
|
||||||
for y in range(len(self.y_to_bin), self.image_height):
|
|
||||||
self.pixels.append(self.palette[0])
|
|
||||||
|
|
||||||
def save(self, filename, quality=90):
|
|
||||||
self.image = Image.new(
|
|
||||||
'RGBA',
|
|
||||||
(self.image_height, self.image_width))
|
|
||||||
|
|
||||||
self.image.putdata(self.pixels)
|
|
||||||
self.image.transpose(Image.ROTATE_90).save(
|
|
||||||
filename,
|
|
||||||
quality=quality)
|
|
||||||
|
|
||||||
|
|
||||||
class AudioProcessor(object):
|
|
||||||
"""
|
|
||||||
The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
|
|
||||||
samples in that chunk of audio.
|
|
||||||
"""
|
|
||||||
def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
|
|
||||||
max_level = get_max_level(input_filename)
|
|
||||||
|
|
||||||
self.audio_file = audiolab.Sndfile(input_filename, 'r')
|
|
||||||
self.fft_size = fft_size
|
|
||||||
self.window = window_function(self.fft_size)
|
|
||||||
self.spectrum_range = None
|
|
||||||
self.lower = 100
|
|
||||||
self.higher = 22050
|
|
||||||
self.lower_log = math.log10(self.lower)
|
|
||||||
self.higher_log = math.log10(self.higher)
|
|
||||||
self.clip = lambda val, low, high: min(high, max(low, val))
|
|
||||||
|
|
||||||
# figure out what the maximum value is for an FFT doing the FFT of a DC signal
|
|
||||||
fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
|
|
||||||
max_fft = (numpy.abs(fft)).max()
|
|
||||||
|
|
||||||
# set the scale to normalized audio and normalized FFT
|
|
||||||
self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1
|
|
||||||
|
|
||||||
def read(self, start, size, resize_if_less=False):
|
|
||||||
""" read size samples starting at start, if resize_if_less is True and less than size
|
|
||||||
samples are read, resize the array to size and fill with zeros """
|
|
||||||
|
|
||||||
# number of zeros to add to start and end of the buffer
|
|
||||||
add_to_start = 0
|
|
||||||
add_to_end = 0
|
|
||||||
|
|
||||||
if start < 0:
|
|
||||||
# the first FFT window starts centered around zero
|
|
||||||
if size + start <= 0:
|
|
||||||
return numpy.zeros(size) if resize_if_less else numpy.array([])
|
|
||||||
else:
|
|
||||||
self.audio_file.seek(0)
|
|
||||||
|
|
||||||
add_to_start = - start # remember: start is negative!
|
|
||||||
to_read = size + start
|
|
||||||
|
|
||||||
if to_read > self.audio_file.nframes:
|
|
||||||
add_to_end = to_read - self.audio_file.nframes
|
|
||||||
to_read = self.audio_file.nframes
|
|
||||||
else:
|
|
||||||
self.audio_file.seek(start)
|
|
||||||
|
|
||||||
to_read = size
|
|
||||||
if start + to_read >= self.audio_file.nframes:
|
|
||||||
to_read = self.audio_file.nframes - start
|
|
||||||
add_to_end = size - to_read
|
|
||||||
|
|
||||||
try:
|
|
||||||
samples = self.audio_file.read_frames(to_read)
|
|
||||||
except RuntimeError:
|
|
||||||
# this can happen for wave files with broken headers...
|
|
||||||
return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
|
|
||||||
|
|
||||||
# convert to mono by selecting left channel only
|
|
||||||
if self.audio_file.channels > 1:
|
|
||||||
samples = samples[:,0]
|
|
||||||
|
|
||||||
if resize_if_less and (add_to_start > 0 or add_to_end > 0):
|
|
||||||
if add_to_start > 0:
|
|
||||||
samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
|
|
||||||
|
|
||||||
if add_to_end > 0:
|
|
||||||
samples = numpy.resize(samples, size)
|
|
||||||
samples[size - add_to_end:] = 0
|
|
||||||
|
|
||||||
return samples
|
|
||||||
|
|
||||||
def spectral_centroid(self, seek_point, spec_range=110.0):
|
|
||||||
""" starting at seek_point read fft_size samples, and calculate the spectral centroid """
|
|
||||||
|
|
||||||
samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
|
|
||||||
|
|
||||||
samples *= self.window
|
|
||||||
fft = numpy.fft.rfft(samples)
|
|
||||||
spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
|
|
||||||
|
|
||||||
length = numpy.float64(spectrum.shape[0])
|
|
||||||
|
|
||||||
# scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
|
|
||||||
db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
|
|
||||||
|
|
||||||
energy = spectrum.sum()
|
|
||||||
spectral_centroid = 0
|
|
||||||
|
|
||||||
if energy > 1e-60:
|
|
||||||
# calculate the spectral centroid
|
|
||||||
|
|
||||||
if self.spectrum_range == None:
|
|
||||||
self.spectrum_range = numpy.arange(length)
|
|
||||||
|
|
||||||
spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
|
|
||||||
|
|
||||||
# clip > log10 > scale between 0 and 1
|
|
||||||
spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
|
|
||||||
|
|
||||||
return (spectral_centroid, db_spectrum)
|
|
||||||
|
|
||||||
|
|
||||||
def peaks(self, start_seek, end_seek):
|
|
||||||
""" read all samples between start_seek and end_seek, then find the minimum and maximum peak
|
|
||||||
in that range. Returns that pair in the order they were found. So if min was found first,
|
|
||||||
it returns (min, max) else the other way around. """
|
|
||||||
|
|
||||||
# larger blocksizes are faster but take more mem...
|
|
||||||
# Aha, Watson, a clue, a tradeof!
|
|
||||||
block_size = 4096
|
|
||||||
|
|
||||||
max_index = -1
|
|
||||||
max_value = -1
|
|
||||||
min_index = -1
|
|
||||||
min_value = 1
|
|
||||||
|
|
||||||
if start_seek < 0:
|
|
||||||
start_seek = 0
|
|
||||||
|
|
||||||
if end_seek > self.audio_file.nframes:
|
|
||||||
end_seek = self.audio_file.nframes
|
|
||||||
|
|
||||||
if end_seek <= start_seek:
|
|
||||||
samples = self.read(start_seek, 1)
|
|
||||||
return (samples[0], samples[0])
|
|
||||||
|
|
||||||
if block_size > end_seek - start_seek:
|
|
||||||
block_size = end_seek - start_seek
|
|
||||||
|
|
||||||
for i in range(start_seek, end_seek, block_size):
|
|
||||||
samples = self.read(i, block_size)
|
|
||||||
|
|
||||||
local_max_index = numpy.argmax(samples)
|
|
||||||
local_max_value = samples[local_max_index]
|
|
||||||
|
|
||||||
if local_max_value > max_value:
|
|
||||||
max_value = local_max_value
|
|
||||||
max_index = local_max_index
|
|
||||||
|
|
||||||
local_min_index = numpy.argmin(samples)
|
|
||||||
local_min_value = samples[local_min_index]
|
|
||||||
|
|
||||||
if local_min_value < min_value:
|
|
||||||
min_value = local_min_value
|
|
||||||
min_index = local_min_index
|
|
||||||
|
|
||||||
return (min_value, max_value) if min_index < max_index else (max_value, min_value)
|
|
||||||
|
|
||||||
|
|
||||||
def create_spectrogram_image(source_filename, output_filename,
|
|
||||||
image_size, fft_size, progress_callback=None):
|
|
||||||
|
|
||||||
processor = AudioProcessor(source_filename, fft_size, numpy.hamming)
|
|
||||||
samples_per_pixel = processor.audio_file.nframes / float(image_size[0])
|
|
||||||
|
|
||||||
spectrogram = SpectrogramImage(image_size, fft_size)
|
|
||||||
|
|
||||||
for x in range(image_size[0]):
|
|
||||||
if progress_callback and x % (image_size[0] / 10) == 0:
|
|
||||||
progress_callback((x * 100) / image_size[0])
|
|
||||||
|
|
||||||
seek_point = int(x * samples_per_pixel)
|
|
||||||
next_seek_point = int((x + 1) * samples_per_pixel)
|
|
||||||
|
|
||||||
(spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
|
|
||||||
|
|
||||||
spectrogram.draw_spectrum(x, db_spectrum)
|
|
||||||
|
|
||||||
if progress_callback:
|
|
||||||
progress_callback(100)
|
|
||||||
|
|
||||||
spectrogram.save(output_filename)
|
|
||||||
|
|
||||||
|
|
||||||
def interpolate_colors(colors, flat=False, num_colors=256):
|
|
||||||
|
|
||||||
palette = []
|
|
||||||
|
|
||||||
for i in range(num_colors):
|
|
||||||
# TODO: What does this do?
|
|
||||||
index = (
|
|
||||||
(i *
|
|
||||||
(len(colors) - 1) # 7
|
|
||||||
) # 0..7..14..21..28...
|
|
||||||
/
|
|
||||||
(num_colors - 1.0) # 255.0
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO: What is the meaning of 'alpha' in this context?
|
|
||||||
alpha = index - round(index)
|
|
||||||
|
|
||||||
channels = list('rgb')
|
|
||||||
values = dict()
|
|
||||||
|
|
||||||
for k, v in zip(range(len(channels)), channels):
|
|
||||||
if alpha > 0:
|
|
||||||
values[v] = (
|
|
||||||
(1.0 - alpha)
|
|
||||||
*
|
|
||||||
colors[int(index)][k]
|
|
||||||
+
|
|
||||||
alpha * colors[int(index) + 1][k]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
values[v] = (
|
|
||||||
(1.0 - alpha)
|
|
||||||
*
|
|
||||||
colors[int(index)][k]
|
|
||||||
)
|
|
||||||
|
|
||||||
if flat:
|
|
||||||
palette.extend(
|
|
||||||
tuple(int(values[i]) for i in channels))
|
|
||||||
else:
|
|
||||||
palette.append(
|
|
||||||
tuple(int(values[i]) for i in channels))
|
|
||||||
|
|
||||||
return palette
|
|
||||||
|
|
||||||
|
|
||||||
def get_max_level(filename):
|
|
||||||
max_value = 0
|
|
||||||
buffer_size = 4096
|
|
||||||
audio_file = audiolab.Sndfile(filename, 'r')
|
|
||||||
n_samples_left = audio_file.nframes
|
|
||||||
|
|
||||||
while n_samples_left:
|
|
||||||
to_read = min(buffer_size, n_samples_left)
|
|
||||||
|
|
||||||
try:
|
|
||||||
samples = audio_file.read_frames(to_read)
|
|
||||||
except RuntimeError:
|
|
||||||
# this can happen with a broken header
|
|
||||||
break
|
|
||||||
|
|
||||||
# convert to mono by selecting left channel only
|
|
||||||
if audio_file.channels > 1:
|
|
||||||
samples = samples[:,0]
|
|
||||||
|
|
||||||
max_value = max(max_value, numpy.abs(samples).max())
|
|
||||||
|
|
||||||
n_samples_left -= to_read
|
|
||||||
|
|
||||||
audio_file.close()
|
|
||||||
|
|
||||||
return max_value
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
sys.argv[4] = int(sys.argv[4])
|
|
||||||
sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')])
|
|
||||||
|
|
||||||
create_spectrogram_image(*sys.argv[1:])
|
|
@ -43,45 +43,15 @@ gi.require_version('Gst', '1.0')
|
|||||||
from gi.repository import GObject, Gst
|
from gi.repository import GObject, Gst
|
||||||
Gst.init(None)
|
Gst.init(None)
|
||||||
|
|
||||||
|
class Python3AudioThumbnailer(object):
|
||||||
# TODO: Now unused - remove.
|
|
||||||
class Python2AudioThumbnailer(object):
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
_log.info('Initializing {0}'.format(self.__class__.__name__))
|
_log.info('Initializing {0}'.format(self.__class__.__name__))
|
||||||
|
|
||||||
def spectrogram(self, src, dst, **kw):
|
def spectrogram(self, src, dst, **kw):
|
||||||
import numpy
|
from mediagoblin.media_types.audio import audiotospectrogram
|
||||||
# This third-party bundled module is Python 2-only.
|
fft_size = kw.get('fft_size', 1024)
|
||||||
from mediagoblin.media_types.audio import audioprocessing
|
|
||||||
|
|
||||||
width = kw['width']
|
|
||||||
height = int(kw.get('height', float(width) * 0.3))
|
|
||||||
fft_size = kw.get('fft_size', 2048)
|
|
||||||
callback = kw.get('progress_callback')
|
callback = kw.get('progress_callback')
|
||||||
processor = audioprocessing.AudioProcessor(
|
audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback)
|
||||||
src,
|
|
||||||
fft_size,
|
|
||||||
numpy.hanning)
|
|
||||||
|
|
||||||
samples_per_pixel = processor.audio_file.nframes / float(width)
|
|
||||||
|
|
||||||
spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
|
|
||||||
|
|
||||||
for x in range(width):
|
|
||||||
if callback and x % (width / 10) == 0:
|
|
||||||
callback((x * 100) / width)
|
|
||||||
|
|
||||||
seek_point = int(x * samples_per_pixel)
|
|
||||||
|
|
||||||
(spectral_centroid, db_spectrum) = processor.spectral_centroid(
|
|
||||||
seek_point)
|
|
||||||
|
|
||||||
spectrogram.draw_spectrum(x, db_spectrum)
|
|
||||||
|
|
||||||
if callback:
|
|
||||||
callback(100)
|
|
||||||
|
|
||||||
spectrogram.save(dst)
|
|
||||||
|
|
||||||
def thumbnail_spectrogram(self, src, dst, thumb_size):
|
def thumbnail_spectrogram(self, src, dst, thumb_size):
|
||||||
'''
|
'''
|
||||||
@ -111,31 +81,7 @@ class Python2AudioThumbnailer(object):
|
|||||||
|
|
||||||
th.save(dst)
|
th.save(dst)
|
||||||
|
|
||||||
|
AudioThumbnailer = Python3AudioThumbnailer
|
||||||
class DummyAudioThumbnailer(Python2AudioThumbnailer):
|
|
||||||
"""A thumbnailer that just outputs a stock image.
|
|
||||||
|
|
||||||
The Python package used for audio spectrograms, "scikits.audiolab", does not
|
|
||||||
support Python 3 and is a constant source of problems for people installing
|
|
||||||
MediaGoblin. Until the feature is rewritten, this thumbnailer class simply
|
|
||||||
provides a generic image.
|
|
||||||
|
|
||||||
TODO: Consider Python 3 compatible interfaces to libsndfile, such as
|
|
||||||
https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here
|
|
||||||
https://issues.mediagoblin.org/ticket/5467#comment:6
|
|
||||||
|
|
||||||
"""
|
|
||||||
def spectrogram(self, src, dst, **kw):
|
|
||||||
# Using PIL here in case someone wants to swap out the image for a PNG.
|
|
||||||
# This will convert to JPEG, where simply copying the file won't.
|
|
||||||
img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg')
|
|
||||||
img.save(dst)
|
|
||||||
|
|
||||||
|
|
||||||
# Due to recurring problems with spectrograms under Python 2, and the fact we're
|
|
||||||
# soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594.
|
|
||||||
AudioThumbnailer = DummyAudioThumbnailer
|
|
||||||
|
|
||||||
|
|
||||||
class AudioTranscoder(object):
|
class AudioTranscoder(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -25,7 +25,6 @@ import imghdr
|
|||||||
#os.environ['GST_DEBUG'] = '4,python:4'
|
#os.environ['GST_DEBUG'] = '4,python:4'
|
||||||
|
|
||||||
pytest.importorskip("gi.repository.Gst")
|
pytest.importorskip("gi.repository.Gst")
|
||||||
pytest.importorskip("scikits.audiolab")
|
|
||||||
import gi
|
import gi
|
||||||
gi.require_version('Gst', '1.0')
|
gi.require_version('Gst', '1.0')
|
||||||
from gi.repository import Gst
|
from gi.repository import Gst
|
||||||
|
2
setup.py
2
setup.py
@ -72,6 +72,8 @@ install_requires = [
|
|||||||
'PyLD<2.0.0', # Breaks a Python 3 test if >= 2.0.0.
|
'PyLD<2.0.0', # Breaks a Python 3 test if >= 2.0.0.
|
||||||
'ExifRead>=2.0.0',
|
'ExifRead>=2.0.0',
|
||||||
'email-validator', # Seems that WTForms must have dropped this.
|
'email-validator', # Seems that WTForms must have dropped this.
|
||||||
|
'soundfile<=0.10.999' # Tested with 0.10.3.post1
|
||||||
|
|
||||||
# This is optional:
|
# This is optional:
|
||||||
# 'translitcodec',
|
# 'translitcodec',
|
||||||
# For now we're expecting that users will install this from
|
# For now we're expecting that users will install this from
|
||||||
|
Loading…
x
Reference in New Issue
Block a user