Audio thumbnailing & spectrograms, media plugins use sniffing
* Added extlib/freesound/audioprocessing.py * config_spec * Added create_spectrogram setting * Added media:medium and media:thumb max_{width,height} settings * Added sniffing logic to - audio.processing:sniff_handler - video.processing:sniff_handler * Changed audio.processing:sniff_handler logic * Added audio thumbnailing functionality to audio.processing (works only with create_spectrogram enabled) * Refractored contexts in audio.processing * Added audio.transcoders:AudioThumbnailer Used for creating spectrograms and spectrogram thumbnails - Wadsworth's Constant, we meet again :) * audio.transcoders:AudioTranscoder - Added mux_string kwarg - Delete self.pipeline on self.halt() * Changed str.format formatting in image.processing:sniff_handler Had {1} without an {0}, changed to {0} * Refractored VideoTranscoder to use transcode() for transcoding instead of __init__() * Added discover() method to video.transcoders:VideoTranscoder * Added spectrogram display to media_displays/audio.html * Updated test_submission to reflect changes in media plugin delegation
This commit is contained in:
parent
9f46a79dde
commit
10085b7739
616
extlib/freesound/audioprocessing.py
Normal file
616
extlib/freesound/audioprocessing.py
Normal file
@ -0,0 +1,616 @@
|
||||
#!/usr/bin/env python
|
||||
# processing.py -- various audio processing functions
|
||||
# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
|
||||
# UNIVERSITAT POMPEU FABRA
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# Authors:
|
||||
# Bram de Jong <bram.dejong at domain.com where domain in gmail>
|
||||
# 2012, Joar Wandborg <first name at last name dot se>
|
||||
|
||||
from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport
|
||||
from functools import partial
|
||||
import math
|
||||
import numpy
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
|
||||
|
||||
def get_sound_type(input_filename):
|
||||
sound_type = os.path.splitext(input_filename.lower())[1].strip(".")
|
||||
|
||||
if sound_type == "fla":
|
||||
sound_type = "flac"
|
||||
elif sound_type == "aif":
|
||||
sound_type = "aiff"
|
||||
|
||||
return sound_type
|
||||
|
||||
|
||||
try:
|
||||
import scikits.audiolab as audiolab
|
||||
except ImportError:
|
||||
print "WARNING: audiolab is not installed so wav2png will not work"
|
||||
import subprocess
|
||||
|
||||
class AudioProcessingException(Exception):
|
||||
pass
|
||||
|
||||
class TestAudioFile(object):
|
||||
"""A class that mimics audiolab.sndfile but generates noise instead of reading
|
||||
a wave file. Additionally it can be told to have a "broken" header and thus crashing
|
||||
in the middle of the file. Also useful for testing ultra-short files of 20 samples."""
|
||||
def __init__(self, num_frames, has_broken_header=False):
|
||||
self.seekpoint = 0
|
||||
self.nframes = num_frames
|
||||
self.samplerate = 44100
|
||||
self.channels = 1
|
||||
self.has_broken_header = has_broken_header
|
||||
|
||||
def seek(self, seekpoint):
|
||||
self.seekpoint = seekpoint
|
||||
|
||||
def read_frames(self, frames_to_read):
|
||||
if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2:
|
||||
raise RuntimeError()
|
||||
|
||||
num_frames_left = self.num_frames - self.seekpoint
|
||||
will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read
|
||||
self.seekpoint += will_read
|
||||
return numpy.random.random(will_read)*2 - 1
|
||||
|
||||
|
||||
def get_max_level(filename):
|
||||
max_value = 0
|
||||
buffer_size = 4096
|
||||
audio_file = audiolab.Sndfile(filename, 'r')
|
||||
n_samples_left = audio_file.nframes
|
||||
|
||||
while n_samples_left:
|
||||
to_read = min(buffer_size, n_samples_left)
|
||||
|
||||
try:
|
||||
samples = audio_file.read_frames(to_read)
|
||||
except RuntimeError:
|
||||
# this can happen with a broken header
|
||||
break
|
||||
|
||||
# convert to mono by selecting left channel only
|
||||
if audio_file.channels > 1:
|
||||
samples = samples[:,0]
|
||||
|
||||
max_value = max(max_value, numpy.abs(samples).max())
|
||||
|
||||
n_samples_left -= to_read
|
||||
|
||||
audio_file.close()
|
||||
|
||||
return max_value
|
||||
|
||||
class AudioProcessor(object):
|
||||
"""
|
||||
The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
|
||||
samples in that chunk of audio.
|
||||
"""
|
||||
def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
|
||||
max_level = get_max_level(input_filename)
|
||||
|
||||
self.audio_file = audiolab.Sndfile(input_filename, 'r')
|
||||
self.fft_size = fft_size
|
||||
self.window = window_function(self.fft_size)
|
||||
self.spectrum_range = None
|
||||
self.lower = 100
|
||||
self.higher = 22050
|
||||
self.lower_log = math.log10(self.lower)
|
||||
self.higher_log = math.log10(self.higher)
|
||||
self.clip = lambda val, low, high: min(high, max(low, val))
|
||||
|
||||
# figure out what the maximum value is for an FFT doing the FFT of a DC signal
|
||||
fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
|
||||
max_fft = (numpy.abs(fft)).max()
|
||||
# set the scale to normalized audio and normalized FFT
|
||||
self.scale = 1.0/max_level/max_fft if max_level > 0 else 1
|
||||
|
||||
def read(self, start, size, resize_if_less=False):
|
||||
""" read size samples starting at start, if resize_if_less is True and less than size
|
||||
samples are read, resize the array to size and fill with zeros """
|
||||
|
||||
# number of zeros to add to start and end of the buffer
|
||||
add_to_start = 0
|
||||
add_to_end = 0
|
||||
|
||||
if start < 0:
|
||||
# the first FFT window starts centered around zero
|
||||
if size + start <= 0:
|
||||
return numpy.zeros(size) if resize_if_less else numpy.array([])
|
||||
else:
|
||||
self.audio_file.seek(0)
|
||||
|
||||
add_to_start = -start # remember: start is negative!
|
||||
to_read = size + start
|
||||
|
||||
if to_read > self.audio_file.nframes:
|
||||
add_to_end = to_read - self.audio_file.nframes
|
||||
to_read = self.audio_file.nframes
|
||||
else:
|
||||
self.audio_file.seek(start)
|
||||
|
||||
to_read = size
|
||||
if start + to_read >= self.audio_file.nframes:
|
||||
to_read = self.audio_file.nframes - start
|
||||
add_to_end = size - to_read
|
||||
|
||||
try:
|
||||
samples = self.audio_file.read_frames(to_read)
|
||||
except RuntimeError:
|
||||
# this can happen for wave files with broken headers...
|
||||
return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
|
||||
|
||||
# convert to mono by selecting left channel only
|
||||
if self.audio_file.channels > 1:
|
||||
samples = samples[:,0]
|
||||
|
||||
if resize_if_less and (add_to_start > 0 or add_to_end > 0):
|
||||
if add_to_start > 0:
|
||||
samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
|
||||
|
||||
if add_to_end > 0:
|
||||
samples = numpy.resize(samples, size)
|
||||
samples[size - add_to_end:] = 0
|
||||
|
||||
return samples
|
||||
|
||||
|
||||
def spectral_centroid(self, seek_point, spec_range=110.0):
|
||||
""" starting at seek_point read fft_size samples, and calculate the spectral centroid """
|
||||
|
||||
samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
|
||||
|
||||
samples *= self.window
|
||||
fft = numpy.fft.rfft(samples)
|
||||
spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
|
||||
length = numpy.float64(spectrum.shape[0])
|
||||
|
||||
# scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
|
||||
db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
|
||||
|
||||
energy = spectrum.sum()
|
||||
spectral_centroid = 0
|
||||
|
||||
if energy > 1e-60:
|
||||
# calculate the spectral centroid
|
||||
|
||||
if self.spectrum_range == None:
|
||||
self.spectrum_range = numpy.arange(length)
|
||||
|
||||
spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
|
||||
|
||||
# clip > log10 > scale between 0 and 1
|
||||
spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
|
||||
|
||||
return (spectral_centroid, db_spectrum)
|
||||
|
||||
|
||||
def peaks(self, start_seek, end_seek):
|
||||
""" read all samples between start_seek and end_seek, then find the minimum and maximum peak
|
||||
in that range. Returns that pair in the order they were found. So if min was found first,
|
||||
it returns (min, max) else the other way around. """
|
||||
|
||||
# larger blocksizes are faster but take more mem...
|
||||
# Aha, Watson, a clue, a tradeof!
|
||||
block_size = 4096
|
||||
|
||||
max_index = -1
|
||||
max_value = -1
|
||||
min_index = -1
|
||||
min_value = 1
|
||||
|
||||
if start_seek < 0:
|
||||
start_seek = 0
|
||||
|
||||
if end_seek > self.audio_file.nframes:
|
||||
end_seek = self.audio_file.nframes
|
||||
|
||||
if end_seek <= start_seek:
|
||||
samples = self.read(start_seek, 1)
|
||||
return (samples[0], samples[0])
|
||||
|
||||
if block_size > end_seek - start_seek:
|
||||
block_size = end_seek - start_seek
|
||||
|
||||
for i in range(start_seek, end_seek, block_size):
|
||||
samples = self.read(i, block_size)
|
||||
|
||||
local_max_index = numpy.argmax(samples)
|
||||
local_max_value = samples[local_max_index]
|
||||
|
||||
if local_max_value > max_value:
|
||||
max_value = local_max_value
|
||||
max_index = local_max_index
|
||||
|
||||
local_min_index = numpy.argmin(samples)
|
||||
local_min_value = samples[local_min_index]
|
||||
|
||||
if local_min_value < min_value:
|
||||
min_value = local_min_value
|
||||
min_index = local_min_index
|
||||
|
||||
return (min_value, max_value) if min_index < max_index else (max_value, min_value)
|
||||
|
||||
|
||||
def interpolate_colors(colors, flat=False, num_colors=256):
|
||||
""" given a list of colors, create a larger list of colors interpolating
|
||||
the first one. If flatten is True a list of numers will be returned. If
|
||||
False, a list of (r,g,b) tuples. num_colors is the number of colors wanted
|
||||
in the final list """
|
||||
|
||||
palette = []
|
||||
|
||||
for i in range(num_colors):
|
||||
index = (i * (len(colors) - 1))/(num_colors - 1.0)
|
||||
index_int = int(index)
|
||||
alpha = index - float(index_int)
|
||||
|
||||
if alpha > 0:
|
||||
r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0]
|
||||
g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1]
|
||||
b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2]
|
||||
else:
|
||||
r = (1.0 - alpha) * colors[index_int][0]
|
||||
g = (1.0 - alpha) * colors[index_int][1]
|
||||
b = (1.0 - alpha) * colors[index_int][2]
|
||||
|
||||
if flat:
|
||||
palette.extend((int(r), int(g), int(b)))
|
||||
else:
|
||||
palette.append((int(r), int(g), int(b)))
|
||||
|
||||
return palette
|
||||
|
||||
|
||||
def desaturate(rgb, amount):
|
||||
"""
|
||||
desaturate colors by amount
|
||||
amount == 0, no change
|
||||
amount == 1, grey
|
||||
"""
|
||||
luminosity = sum(rgb) / 3.0
|
||||
desat = lambda color: color - amount * (color - luminosity)
|
||||
|
||||
return tuple(map(int, map(desat, rgb)))
|
||||
|
||||
|
||||
class WaveformImage(object):
|
||||
"""
|
||||
Given peaks and spectral centroids from the AudioProcessor, this class will construct
|
||||
a wavefile image which can be saved as PNG.
|
||||
"""
|
||||
def __init__(self, image_width, image_height, palette=1):
|
||||
if image_height % 2 == 0:
|
||||
raise AudioProcessingException, "Height should be uneven: images look much better at uneven height"
|
||||
|
||||
if palette == 1:
|
||||
background_color = (0,0,0)
|
||||
colors = [
|
||||
(50,0,200),
|
||||
(0,220,80),
|
||||
(255,224,0),
|
||||
(255,70,0),
|
||||
]
|
||||
elif palette == 2:
|
||||
background_color = (0,0,0)
|
||||
colors = [self.color_from_value(value/29.0) for value in range(0,30)]
|
||||
elif palette == 3:
|
||||
background_color = (213, 217, 221)
|
||||
colors = map( partial(desaturate, amount=0.7), [
|
||||
(50,0,200),
|
||||
(0,220,80),
|
||||
(255,224,0),
|
||||
])
|
||||
elif palette == 4:
|
||||
background_color = (213, 217, 221)
|
||||
colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)])
|
||||
|
||||
self.image = Image.new("RGB", (image_width, image_height), background_color)
|
||||
|
||||
self.image_width = image_width
|
||||
self.image_height = image_height
|
||||
|
||||
self.draw = ImageDraw.Draw(self.image)
|
||||
self.previous_x, self.previous_y = None, None
|
||||
|
||||
self.color_lookup = interpolate_colors(colors)
|
||||
self.pix = self.image.load()
|
||||
|
||||
def color_from_value(self, value):
|
||||
""" given a value between 0 and 1, return an (r,g,b) tuple """
|
||||
|
||||
return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50))
|
||||
|
||||
def draw_peaks(self, x, peaks, spectral_centroid):
|
||||
""" draw 2 peaks at x using the spectral_centroid for color """
|
||||
|
||||
y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5
|
||||
y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5
|
||||
|
||||
line_color = self.color_lookup[int(spectral_centroid*255.0)]
|
||||
|
||||
if self.previous_y != None:
|
||||
self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color)
|
||||
else:
|
||||
self.draw.line([x, y1, x, y2], line_color)
|
||||
|
||||
self.previous_x, self.previous_y = x, y2
|
||||
|
||||
self.draw_anti_aliased_pixels(x, y1, y2, line_color)
|
||||
|
||||
def draw_anti_aliased_pixels(self, x, y1, y2, color):
|
||||
""" vertical anti-aliasing at y1 and y2 """
|
||||
|
||||
y_max = max(y1, y2)
|
||||
y_max_int = int(y_max)
|
||||
alpha = y_max - y_max_int
|
||||
|
||||
if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height:
|
||||
current_pix = self.pix[x, y_max_int + 1]
|
||||
|
||||
r = int((1-alpha)*current_pix[0] + alpha*color[0])
|
||||
g = int((1-alpha)*current_pix[1] + alpha*color[1])
|
||||
b = int((1-alpha)*current_pix[2] + alpha*color[2])
|
||||
|
||||
self.pix[x, y_max_int + 1] = (r,g,b)
|
||||
|
||||
y_min = min(y1, y2)
|
||||
y_min_int = int(y_min)
|
||||
alpha = 1.0 - (y_min - y_min_int)
|
||||
|
||||
if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0:
|
||||
current_pix = self.pix[x, y_min_int - 1]
|
||||
|
||||
r = int((1-alpha)*current_pix[0] + alpha*color[0])
|
||||
g = int((1-alpha)*current_pix[1] + alpha*color[1])
|
||||
b = int((1-alpha)*current_pix[2] + alpha*color[2])
|
||||
|
||||
self.pix[x, y_min_int - 1] = (r,g,b)
|
||||
|
||||
def save(self, filename):
|
||||
# draw a zero "zero" line
|
||||
a = 25
|
||||
for x in range(self.image_width):
|
||||
self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2]))
|
||||
|
||||
self.image.save(filename)
|
||||
|
||||
|
||||
class SpectrogramImage(object):
|
||||
"""
|
||||
Given spectra from the AudioProcessor, this class will construct a wavefile image which
|
||||
can be saved as PNG.
|
||||
"""
|
||||
def __init__(self, image_width, image_height, fft_size):
|
||||
self.image_width = image_width
|
||||
self.image_height = image_height
|
||||
self.fft_size = fft_size
|
||||
|
||||
self.image = Image.new("RGBA", (image_height, image_width))
|
||||
|
||||
colors = [
|
||||
(0, 0, 0, 0),
|
||||
(58/4, 68/4, 65/4, 255),
|
||||
(80/2, 100/2, 153/2, 255),
|
||||
(90, 180, 100, 255),
|
||||
(224, 224, 44, 255),
|
||||
(255, 60, 30, 255),
|
||||
(255, 255, 255, 255)
|
||||
]
|
||||
self.palette = interpolate_colors(colors)
|
||||
|
||||
# generate the lookup which translates y-coordinate to fft-bin
|
||||
self.y_to_bin = []
|
||||
f_min = 100.0
|
||||
f_max = 22050.0
|
||||
y_min = math.log10(f_min)
|
||||
y_max = math.log10(f_max)
|
||||
for y in range(self.image_height):
|
||||
freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min))
|
||||
bin = freq / 22050.0 * (self.fft_size/2 + 1)
|
||||
|
||||
if bin < self.fft_size/2:
|
||||
alpha = bin - int(bin)
|
||||
|
||||
self.y_to_bin.append((int(bin), alpha * 255))
|
||||
|
||||
# this is a bit strange, but using image.load()[x,y] = ... is
|
||||
# a lot slower than using image.putadata and then rotating the image
|
||||
# so we store all the pixels in an array and then create the image when saving
|
||||
self.pixels = []
|
||||
|
||||
def draw_spectrum(self, x, spectrum):
|
||||
# for all frequencies, draw the pixels
|
||||
for (index, alpha) in self.y_to_bin:
|
||||
self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] )
|
||||
|
||||
# if the FFT is too small to fill up the image, fill with black to the top
|
||||
for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable
|
||||
self.pixels.append(self.palette[0])
|
||||
|
||||
def save(self, filename, quality=80):
|
||||
assert filename.lower().endswith(".jpg")
|
||||
self.image.putdata(self.pixels)
|
||||
self.image.transpose(Image.ROTATE_90).save(filename, quality=quality)
|
||||
|
||||
|
||||
def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None):
|
||||
"""
|
||||
Utility function for creating both wavefile and spectrum images from an audio input file.
|
||||
"""
|
||||
processor = AudioProcessor(input_filename, fft_size, numpy.hanning)
|
||||
samples_per_pixel = processor.audio_file.nframes / float(image_width)
|
||||
|
||||
waveform = WaveformImage(image_width, image_height)
|
||||
spectrogram = SpectrogramImage(image_width, image_height, fft_size)
|
||||
|
||||
for x in range(image_width):
|
||||
|
||||
if progress_callback and x % (image_width/10) == 0:
|
||||
progress_callback((x*100)/image_width)
|
||||
|
||||
seek_point = int(x * samples_per_pixel)
|
||||
next_seek_point = int((x + 1) * samples_per_pixel)
|
||||
|
||||
(spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
|
||||
peaks = processor.peaks(seek_point, next_seek_point)
|
||||
|
||||
waveform.draw_peaks(x, peaks, spectral_centroid)
|
||||
spectrogram.draw_spectrum(x, db_spectrum)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(100)
|
||||
|
||||
waveform.save(output_filename_w)
|
||||
spectrogram.save(output_filename_s)
|
||||
|
||||
|
||||
class NoSpaceLeftException(Exception):
|
||||
pass
|
||||
|
||||
def convert_to_pcm(input_filename, output_filename):
|
||||
"""
|
||||
converts any audio file type to pcm audio
|
||||
"""
|
||||
|
||||
if not os.path.exists(input_filename):
|
||||
raise AudioProcessingException, "file %s does not exist" % input_filename
|
||||
|
||||
sound_type = get_sound_type(input_filename)
|
||||
|
||||
if sound_type == "mp3":
|
||||
cmd = ["lame", "--decode", input_filename, output_filename]
|
||||
elif sound_type == "ogg":
|
||||
cmd = ["oggdec", input_filename, "-o", output_filename]
|
||||
elif sound_type == "flac":
|
||||
cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename]
|
||||
else:
|
||||
return False
|
||||
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
(stdout, stderr) = process.communicate()
|
||||
|
||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
||||
if "No space left on device" in stderr + " " + stdout:
|
||||
raise NoSpaceLeftException
|
||||
raise AudioProcessingException, "failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename):
|
||||
"""
|
||||
converts a pcm wave file to two channel, 16 bit integer
|
||||
"""
|
||||
|
||||
if not os.path.exists(input_filename):
|
||||
raise AudioProcessingException, "file %s does not exist" % input_filename
|
||||
|
||||
cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename]
|
||||
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
(stdout, stderr) = process.communicate()
|
||||
|
||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
||||
if "No space left on device" in stderr + " " + stdout:
|
||||
raise NoSpaceLeftException
|
||||
raise AudioProcessingException, "failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout
|
||||
|
||||
stdout = (stdout + " " + stderr).replace("\n", " ")
|
||||
|
||||
duration = 0
|
||||
m = re.match(r".*#duration (?P<duration>[\d\.]+).*", stdout)
|
||||
if m != None:
|
||||
duration = float(m.group("duration"))
|
||||
|
||||
channels = 0
|
||||
m = re.match(r".*#channels (?P<channels>\d+).*", stdout)
|
||||
if m != None:
|
||||
channels = float(m.group("channels"))
|
||||
|
||||
samplerate = 0
|
||||
m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout)
|
||||
if m != None:
|
||||
samplerate = float(m.group("samplerate"))
|
||||
|
||||
bitdepth = None
|
||||
m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout)
|
||||
if m != None:
|
||||
bitdepth = float(m.group("bitdepth"))
|
||||
|
||||
bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0
|
||||
|
||||
return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth)
|
||||
|
||||
|
||||
def convert_to_mp3(input_filename, output_filename, quality=70):
|
||||
"""
|
||||
converts the incoming wave file to a mp3 file
|
||||
"""
|
||||
|
||||
if not os.path.exists(input_filename):
|
||||
raise AudioProcessingException, "file %s does not exist" % input_filename
|
||||
|
||||
command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename]
|
||||
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
(stdout, stderr) = process.communicate()
|
||||
|
||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
||||
raise AudioProcessingException, stdout
|
||||
|
||||
def convert_to_ogg(input_filename, output_filename, quality=1):
|
||||
"""
|
||||
converts the incoming wave file to n ogg file
|
||||
"""
|
||||
|
||||
if not os.path.exists(input_filename):
|
||||
raise AudioProcessingException, "file %s does not exist" % input_filename
|
||||
|
||||
command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename]
|
||||
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
(stdout, stderr) = process.communicate()
|
||||
|
||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
||||
raise AudioProcessingException, stdout
|
||||
|
||||
def convert_using_ffmpeg(input_filename, output_filename):
|
||||
"""
|
||||
converts the incoming wave file to stereo pcm using fffmpeg
|
||||
"""
|
||||
TIMEOUT = 3 * 60
|
||||
def alarm_handler(signum, frame):
|
||||
raise AudioProcessingException, "timeout while waiting for ffmpeg"
|
||||
|
||||
if not os.path.exists(input_filename):
|
||||
raise AudioProcessingException, "file %s does not exist" % input_filename
|
||||
|
||||
command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename]
|
||||
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
signal.signal(signal.SIGALRM,alarm_handler)
|
||||
signal.alarm(TIMEOUT)
|
||||
(stdout, stderr) = process.communicate()
|
||||
signal.alarm(0)
|
||||
if process.returncode != 0 or not os.path.exists(output_filename):
|
||||
raise AudioProcessingException, stdout
|
@ -65,6 +65,14 @@ base_url = string(default="/mgoblin_media/")
|
||||
storage_class = string(default="mediagoblin.storage.filestorage:BasicFileStorage")
|
||||
base_dir = string(default="%(here)s/user_dev/media/queue")
|
||||
|
||||
[media:medium]
|
||||
max_width = integer(default=640)
|
||||
max_height = integer(default=640)
|
||||
|
||||
[media:thumb]
|
||||
max_width = integer(default=180)
|
||||
max_height = integer(default=180)
|
||||
|
||||
[media_type:mediagoblin.media_types.video]
|
||||
# Should we keep the original file?
|
||||
keep_original = boolean(default=False)
|
||||
@ -72,6 +80,7 @@ keep_original = boolean(default=False)
|
||||
[media_type:mediagoblin.media_types.audio]
|
||||
# vorbisenc qualiy
|
||||
quality = float(default=0.3)
|
||||
create_spectrogram = boolean(default=False)
|
||||
|
||||
|
||||
[beaker.cache]
|
||||
|
@ -24,7 +24,16 @@ from mediagoblin.media_types.ascii import asciitoimage
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
|
||||
|
||||
def sniff_handler(media_file, **kw):
|
||||
if not kw.get('media') == None:
|
||||
name, ext = os.path.splitext(kw['media'].filename)
|
||||
clean_ext = ext[1:].lower()
|
||||
|
||||
if clean_ext in SUPPORTED_EXTENSIONS:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process_ascii(entry):
|
||||
|
1
mediagoblin/media_types/audio/audioprocessing.py
Symbolic link
1
mediagoblin/media_types/audio/audioprocessing.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../extlib/freesound/audioprocessing.py
|
@ -21,9 +21,10 @@ import os
|
||||
from mediagoblin import mg_globals as mgg
|
||||
from mediagoblin.processing import create_pub_filepath
|
||||
|
||||
from mediagoblin.media_types.audio.transcoders import AudioTranscoder
|
||||
from mediagoblin.media_types.audio.transcoders import AudioTranscoder, \
|
||||
AudioThumbnailer
|
||||
|
||||
_log = logging.getLogger()
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
def sniff_handler(media_file, **kw):
|
||||
transcoder = AudioTranscoder()
|
||||
@ -33,7 +34,9 @@ def sniff_handler(media_file, **kw):
|
||||
if data.is_audio == True and data.is_video == False:
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def process_audio(entry):
|
||||
audio_config = mgg.global_config['media_type:mediagoblin.media_types.audio']
|
||||
@ -51,10 +54,9 @@ def process_audio(entry):
|
||||
original=os.path.splitext(
|
||||
queued_filepath[-1])[0]))
|
||||
|
||||
ogg_tmp = tempfile.NamedTemporaryFile()
|
||||
transcoder = AudioTranscoder()
|
||||
|
||||
with ogg_tmp:
|
||||
transcoder = AudioTranscoder()
|
||||
with tempfile.NamedTemporaryFile() as ogg_tmp:
|
||||
|
||||
transcoder.transcode(
|
||||
queued_filename,
|
||||
@ -72,11 +74,54 @@ def process_audio(entry):
|
||||
entry.media_data['audio'] = {
|
||||
u'length': int(data.audiolength)}
|
||||
|
||||
thumbnail_tmp = tempfile.NamedTemporaryFile()
|
||||
if audio_config['create_spectrogram']:
|
||||
spectrogram_filepath = create_pub_filepath(
|
||||
entry,
|
||||
'{original}-spectrogram.jpg'.format(
|
||||
original=os.path.splitext(
|
||||
queued_filepath[-1])[0]))
|
||||
|
||||
with thumbnail_tmp:
|
||||
with tempfile.NamedTemporaryFile(suffix='.wav') as wav_tmp:
|
||||
_log.info('Creating WAV source for spectrogram')
|
||||
transcoder.transcode(
|
||||
queued_filename,
|
||||
wav_tmp.name,
|
||||
mux_string='wavenc')
|
||||
|
||||
thumbnailer = AudioThumbnailer()
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.jpg') as spectrogram_tmp:
|
||||
thumbnailer.spectrogram(
|
||||
wav_tmp.name,
|
||||
spectrogram_tmp.name,
|
||||
width=mgg.global_config['media:medium']['max_width'])
|
||||
|
||||
_log.debug('Saving spectrogram...')
|
||||
mgg.public_store.get_file(spectrogram_filepath, 'wb').write(
|
||||
spectrogram_tmp.read())
|
||||
|
||||
entry.media_files['spectrogram'] = spectrogram_filepath
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.jpg') as thumb_tmp:
|
||||
thumbnailer.thumbnail_spectrogram(
|
||||
spectrogram_tmp.name,
|
||||
thumb_tmp.name,
|
||||
(mgg.global_config['media:thumb']['max_width'],
|
||||
mgg.global_config['media:thumb']['max_height']))
|
||||
|
||||
thumb_filepath = create_pub_filepath(
|
||||
entry,
|
||||
'{original}-thumbnail.jpg'.format(
|
||||
original=os.path.splitext(
|
||||
queued_filepath[-1])[0]))
|
||||
|
||||
mgg.public_store.get_file(thumb_filepath, 'wb').write(
|
||||
thumb_tmp.read())
|
||||
|
||||
entry.media_files['thumb'] = thumb_filepath
|
||||
else:
|
||||
entry.media_files['thumb'] = ['fake', 'thumb', 'path.jpg']
|
||||
|
||||
|
||||
mgg.queue_store.delete_file(queued_filepath)
|
||||
|
||||
entry.save()
|
||||
|
@ -16,8 +16,10 @@
|
||||
|
||||
import pdb
|
||||
import logging
|
||||
from PIL import Image
|
||||
|
||||
from mediagoblin.processing import BadMediaFail
|
||||
from mediagoblin.media_types.audio import audioprocessing
|
||||
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -56,6 +58,73 @@ try:
|
||||
except ImportError:
|
||||
raise Exception('gst/pygst >= 0.10 could not be imported')
|
||||
|
||||
import numpy
|
||||
|
||||
class AudioThumbnailer(object):
|
||||
def __init__(self):
|
||||
_log.info('Initializing {0}'.format(self.__class__.__name__))
|
||||
|
||||
def spectrogram(self, src, dst, **kw):
|
||||
width = kw['width']
|
||||
height = int(kw.get('height', float(width) * 0.3))
|
||||
fft_size = kw.get('fft_size', 2048)
|
||||
callback = kw.get('progress_callback')
|
||||
|
||||
processor = audioprocessing.AudioProcessor(
|
||||
src,
|
||||
fft_size,
|
||||
numpy.hanning)
|
||||
|
||||
samples_per_pixel = processor.audio_file.nframes / float(width)
|
||||
|
||||
spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
|
||||
|
||||
for x in range(width):
|
||||
if callback and x % (width / 10) == 0:
|
||||
callback((x * 100) / width)
|
||||
|
||||
seek_point = int(x * samples_per_pixel)
|
||||
|
||||
(spectral_centroid, db_spectrum) = processor.spectral_centroid(
|
||||
seek_point)
|
||||
|
||||
spectrogram.draw_spectrum(x, db_spectrum)
|
||||
|
||||
if callback:
|
||||
callback(100)
|
||||
|
||||
spectrogram.save(dst)
|
||||
|
||||
def thumbnail_spectrogram(self, src, dst, thumb_size):
|
||||
'''
|
||||
Takes a spectrogram and creates a thumbnail from it
|
||||
'''
|
||||
if not (type(thumb_size) == tuple and len(thumb_size) == 2):
|
||||
raise Exception('size argument should be a tuple(width, height)')
|
||||
|
||||
im = Image.open(src)
|
||||
|
||||
im_w, im_h = [float(i) for i in im.size]
|
||||
th_w, th_h = [float(i) for i in thumb_size]
|
||||
|
||||
wadsworth_position = im_w * 0.3
|
||||
|
||||
start_x = max((
|
||||
wadsworth_position - (th_w / 2.0),
|
||||
0.0))
|
||||
|
||||
stop_x = start_x + (im_h * (th_w / th_h))
|
||||
|
||||
th = im.crop((
|
||||
int(start_x), 0,
|
||||
int(stop_x), int(im_h)))
|
||||
|
||||
if th.size[0] > th_w or th.size[1] > th_h:
|
||||
th.thumbnail(thumb_size, Image.ANTIALIAS)
|
||||
|
||||
th.save(dst)
|
||||
|
||||
|
||||
class AudioTranscoder(object):
|
||||
def __init__(self):
|
||||
_log.info('Initializing {0}'.format(self.__class__.__name__))
|
||||
@ -103,17 +172,21 @@ class AudioTranscoder(object):
|
||||
|
||||
quality = kw.get('quality', 0.3)
|
||||
|
||||
mux_string = kw.get(
|
||||
'mux_string',
|
||||
'vorbisenc quality={0} ! webmmux'.format(quality))
|
||||
|
||||
# Set up pipeline
|
||||
self.pipeline = gst.parse_launch(
|
||||
'filesrc location="{src}" ! '
|
||||
'decodebin2 ! queue ! audiorate tolerance={tolerance} ! '
|
||||
'audioconvert ! audio/x-raw-float,channels=2 ! '
|
||||
'vorbisenc quality={quality} ! webmmux ! '
|
||||
'{mux_string} ! '
|
||||
'progressreport silent=true ! '
|
||||
'filesink location="{dst}"'.format(
|
||||
src=src,
|
||||
tolerance=80000000,
|
||||
quality=quality,
|
||||
mux_string=mux_string,
|
||||
dst=dst))
|
||||
|
||||
self.bus = self.pipeline.get_bus()
|
||||
@ -141,6 +214,9 @@ class AudioTranscoder(object):
|
||||
self.halt()
|
||||
|
||||
def halt(self):
|
||||
if getattr(self, 'pipeline', False):
|
||||
self.pipeline.set_state(gst.STATE_NULL)
|
||||
del self.pipeline
|
||||
_log.info('Quitting MainLoop gracefully...')
|
||||
gobject.idle_add(self._loop.quit)
|
||||
|
||||
@ -149,8 +225,12 @@ if __name__ == '__main__':
|
||||
logging.basicConfig()
|
||||
_log.setLevel(logging.INFO)
|
||||
|
||||
transcoder = AudioTranscoder()
|
||||
data = transcoder.discover(sys.argv[1])
|
||||
res = transcoder.transcode(*sys.argv[1:3])
|
||||
#transcoder = AudioTranscoder()
|
||||
#data = transcoder.discover(sys.argv[1])
|
||||
#res = transcoder.transcode(*sys.argv[1:3])
|
||||
|
||||
thumbnailer = AudioThumbnailer()
|
||||
|
||||
thumbnailer.spectrogram(*sys.argv[1:], width=640)
|
||||
|
||||
pdb.set_trace()
|
||||
|
@ -42,7 +42,7 @@ def sniff_handler(media_file, **kw):
|
||||
_log.info('Found file extension in supported filetypes')
|
||||
return True
|
||||
else:
|
||||
_log.debug('Media present, extension not found in {1}'.format(
|
||||
_log.debug('Media present, extension not found in {0}'.format(
|
||||
SUPPORTED_FILETYPES))
|
||||
else:
|
||||
_log.warning('Need additional information (keyword argument \'media\')'
|
||||
|
@ -29,6 +29,18 @@ _log = logging.getLogger(__name__)
|
||||
_log.setLevel(logging.DEBUG)
|
||||
|
||||
def sniff_handler(media_file, **kw):
|
||||
transcoder = transcoders.VideoTranscoder()
|
||||
try:
|
||||
data = transcoder.discover(media_file.name)
|
||||
|
||||
_log.debug('Discovered: {0}'.format(data.__dict__))
|
||||
|
||||
if data.is_video == True:
|
||||
return True
|
||||
except:
|
||||
_log.error('Exception caught when trying to discover {0}'.format(
|
||||
kw.get('media')))
|
||||
|
||||
return False
|
||||
|
||||
def process_video(entry):
|
||||
@ -61,7 +73,8 @@ def process_video(entry):
|
||||
|
||||
with tmp_dst:
|
||||
# Transcode queued file to a VP8/vorbis file that fits in a 640x640 square
|
||||
transcoder = transcoders.VideoTranscoder(queued_filename, tmp_dst.name)
|
||||
transcoder = transcoders.VideoTranscoder()
|
||||
transcoder.transcode(queued_filename, tmp_dst.name)
|
||||
|
||||
# Push transcoded video to public storage
|
||||
_log.debug('Saving medium...')
|
||||
|
@ -25,8 +25,6 @@ import pdb
|
||||
import urllib
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
logging.basicConfig()
|
||||
_log.setLevel(logging.DEBUG)
|
||||
|
||||
CPU_COUNT = 2
|
||||
try:
|
||||
@ -340,10 +338,15 @@ class VideoTranscoder:
|
||||
that it was refined afterwards and therefore is done more
|
||||
correctly.
|
||||
'''
|
||||
def __init__(self, src, dst, **kwargs):
|
||||
def __init__(self):
|
||||
_log.info('Initializing VideoTranscoder...')
|
||||
|
||||
self.loop = gobject.MainLoop()
|
||||
|
||||
def transcode(self, src, dst, **kwargs):
|
||||
'''
|
||||
Transcode a video file into a 'medium'-sized version.
|
||||
'''
|
||||
self.source_path = src
|
||||
self.destination_path = dst
|
||||
|
||||
@ -357,6 +360,30 @@ class VideoTranscoder:
|
||||
self._setup()
|
||||
self._run()
|
||||
|
||||
def discover(self, src):
|
||||
'''
|
||||
Discover properties about a media file
|
||||
'''
|
||||
_log.info('Discovering {0}'.format(src))
|
||||
|
||||
self.source_path = src
|
||||
self._setup_discover(discovered_callback=self.__on_discovered)
|
||||
|
||||
self.discoverer.discover()
|
||||
|
||||
self.loop.run()
|
||||
|
||||
return self._discovered_data
|
||||
|
||||
def __on_discovered(self, data, is_media):
|
||||
if not is_media:
|
||||
self.__stop()
|
||||
raise Exception('Could not discover {0}'.format(self.source_path))
|
||||
|
||||
self._discovered_data = data
|
||||
|
||||
self.__stop_mainloop()
|
||||
|
||||
def _setup(self):
|
||||
self._setup_discover()
|
||||
self._setup_pipeline()
|
||||
@ -369,12 +396,14 @@ class VideoTranscoder:
|
||||
_log.debug('Initializing MainLoop()')
|
||||
self.loop.run()
|
||||
|
||||
def _setup_discover(self):
|
||||
def _setup_discover(self, **kw):
|
||||
_log.debug('Setting up discoverer')
|
||||
self.discoverer = discoverer.Discoverer(self.source_path)
|
||||
|
||||
# Connect self.__discovered to the 'discovered' event
|
||||
self.discoverer.connect('discovered', self.__discovered)
|
||||
self.discoverer.connect(
|
||||
'discovered',
|
||||
kw.get('discovered_callback', self.__discovered))
|
||||
|
||||
def __discovered(self, data, is_media):
|
||||
'''
|
||||
@ -614,14 +643,15 @@ class VideoTranscoder:
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.nice(19)
|
||||
logging.basicConfig()
|
||||
from optparse import OptionParser
|
||||
|
||||
parser = OptionParser(
|
||||
usage='%prog [-v] -a [ video | thumbnail ] SRC DEST')
|
||||
usage='%prog [-v] -a [ video | thumbnail | discover ] SRC [ DEST ]')
|
||||
|
||||
parser.add_option('-a', '--action',
|
||||
dest='action',
|
||||
help='One of "video" or "thumbnail"')
|
||||
help='One of "video", "discover" or "thumbnail"')
|
||||
|
||||
parser.add_option('-v',
|
||||
dest='verbose',
|
||||
@ -645,13 +675,18 @@ if __name__ == '__main__':
|
||||
|
||||
_log.debug(args)
|
||||
|
||||
if not len(args) == 2:
|
||||
if not len(args) == 2 and not options.action == 'discover':
|
||||
parser.print_help()
|
||||
sys.exit()
|
||||
|
||||
transcoder = VideoTranscoder()
|
||||
|
||||
if options.action == 'thumbnail':
|
||||
VideoThumbnailer(*args)
|
||||
elif options.action == 'video':
|
||||
def cb(data):
|
||||
print('I\'m a callback!')
|
||||
transcoder = VideoTranscoder(*args, progress_callback=cb)
|
||||
transcoder.transcode(*args, progress_callback=cb)
|
||||
elif options.action == 'discover':
|
||||
print transcoder.discover(*args).__dict__
|
||||
|
||||
|
@ -20,7 +20,14 @@
|
||||
|
||||
{% block mediagoblin_media %}
|
||||
<div class="audio-media">
|
||||
<audio controls="controls"
|
||||
{% if 'spectrogram' in media.media_files %}
|
||||
<div class="audio-spectrogram">
|
||||
<img src="{{ request.app.public_store.file_url(
|
||||
media.media_files.spectrogram) }}"
|
||||
alt="Spectrogram" />
|
||||
</div>
|
||||
{% endif %}
|
||||
<audio class="audio-player" controls="controls"
|
||||
preload="metadata">
|
||||
<source src="{{ request.app.public_store.file_url(
|
||||
media.media_files.ogg) }}" type="video/webm; encoding="vorbis"" />
|
||||
|
@ -231,7 +231,8 @@ class TestSubmission:
|
||||
|
||||
context = template.TEMPLATE_TEST_CONTEXT['mediagoblin/submit/start.html']
|
||||
form = context['submit_form']
|
||||
assert re.match(r'^Could not extract any file extension from ".*?"$', str(form.file.errors[0]))
|
||||
assert 'Sorry, I don\'t support that file type :(' == \
|
||||
str(form.file.errors[0])
|
||||
assert len(form.file.errors) == 1
|
||||
|
||||
# NOTE: The following 2 tests will ultimately fail, but they
|
||||
|
Loading…
x
Reference in New Issue
Block a user