Christopher Allan Webber e4bdc9091c More steps towards a working reprocessing system.
Fleshing out the base classes and setting up some docstrings.  Not
everything is totally clear yet, but I think it's on a good track, and
getting clearer.

This commit sponsored by Ben Finney, on behalf of Free Software Melbourne.
Thank you all!
2013-08-16 15:30:15 -07:00

319 lines
11 KiB
Python

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import logging
import os
from mediagoblin.db.util import atomic_update
from mediagoblin import mg_globals as mgg
from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
_log = logging.getLogger(__name__)
class ProgressCallback(object):
def __init__(self, entry):
self.entry = entry
def __call__(self, progress):
if progress:
self.entry.transcoding_progress = progress
self.entry.save()
def create_pub_filepath(entry, filename):
return mgg.public_store.get_unique_filepath(
['media_entries',
unicode(entry.id),
filename])
class FilenameBuilder(object):
"""Easily slice and dice filenames.
Initialize this class with an original file path, then use the fill()
method to create new filenames based on the original.
"""
MAX_FILENAME_LENGTH = 255 # VFAT's maximum filename length
def __init__(self, path):
"""Initialize a builder from an original file path."""
self.dirpath, self.basename = os.path.split(path)
self.basename, self.ext = os.path.splitext(self.basename)
self.ext = self.ext.lower()
def fill(self, fmtstr):
"""Build a new filename based on the original.
The fmtstr argument can include the following:
{basename} -- the original basename, with the extension removed
{ext} -- the original extension, always lowercase
If necessary, {basename} will be truncated so the filename does not
exceed this class' MAX_FILENAME_LENGTH in length.
"""
basename_len = (self.MAX_FILENAME_LENGTH -
len(fmtstr.format(basename='', ext=self.ext)))
return fmtstr.format(basename=self.basename[:basename_len],
ext=self.ext)
class MediaProcessor(object):
"""A particular processor for this media type.
While the ProcessingManager handles all types of MediaProcessing
possible for a particular media type, a MediaProcessor can be
thought of as a *particular* processing action for a media type.
For example, you may have separate MediaProcessors for:
- initial_processing: the intial processing of a media
- gen_thumb: generate a thumbnail
- resize: resize an image
- transcode: transcode a video
... etc.
Some information on producing a new MediaProcessor for your media type:
- You *must* supply a name attribute. This must be a class level
attribute, and a string. This will be used to determine the
subcommand of your process
- It's recommended that you supply a class level description
attribute.
- Supply a media_is_eligible classmethod. This will be used to
determine whether or not a media entry is eligible to use this
processor type. See the method documentation for details.
- To give "./bin/gmg reprocess run" abilities to this media type,
supply both gnerate_parser and parser_to_request classmethods.
- The process method will be what actually processes your media.
"""
# You MUST override this in the child MediaProcessor!
name = None
# Optional, but will be used in various places to describe the
# action this MediaProcessor provides
description = None
def __init__(self, manager):
self.manager = manager
# Should be initialized at time of processing, at least
self.workbench = None
# @with_workbench
def process(self, **kwargs):
"""
Actually process this media entry.
"""
raise NotImplementedError
@classmethod
def media_is_eligibile(self, media_entry):
raise NotImplementedError
###############################
# Command line interface things
###############################
@classmethod
def generate_parser(self):
raise NotImplementedError
@classmethod
def parser_to_request(self, parser):
raise NotImplementedError
##########################################
# THE FUTURE: web interface things here :)
##########################################
class ProcessingManager(object):
"""Manages all the processing actions available for a media type
Specific processing actions, MediaProcessor subclasses, are added
to the ProcessingManager.
"""
def __init__(self):
# Dict of all MediaProcessors of this media type
self.processors = {}
def add_processor(self, processor):
"""
Add a processor class to this media type
"""
name = processor.name
if name is None:
raise AttributeError("Processor class's .name attribute not set")
self.processors[name] = processor
def list_eligible_processors(self, entry):
"""
List all processors that this media entry is eligible to be processed
for.
"""
return [
processor
for processor in self.processors.keys()
if processor.media_is_eligible(entry)]
def gen_process_request_via_cli(self, subparser):
# Got to figure out what actually goes here before I can write this properly
pass
def process(self, entry, directive, request):
"""
Process a media entry.
"""
pass
class ProcessingState(object):
"""
The first and only argument to the "processor" of a media type
This could be thought of as a "request" to the processor
function. It has the main info for the request (media entry)
and a bunch of tools for the request on it.
It can get more fancy without impacting old media types.
"""
def __init__(self, entry):
self.entry = entry
self.workbench = None
self.orig_filename = None
def set_workbench(self, wb):
self.workbench = wb
def get_orig_filename(self):
"""
Get the a filename for the original, on local storage
If the media entry has a queued_media_file, use that, otherwise
use the original.
In the future, this will return the highest quality file available
if neither the original or queued file are available
"""
if self.orig_filename is not None:
return self.orig_filename
if self.entry.queued_media_file:
orig_filepath = self.entry.queued_media_file
storage = mgg.queue_store
else:
orig_filepath = self.entry.media_files['original']
storage = mgg.public_store
orig_filename = self.workbench.localized_file(
storage, orig_filepath,
'source')
self.orig_filename = orig_filename
return orig_filename
def copy_original(self, target_name, keyname=u"original"):
self.store_public(keyname, self.get_orig_filename(), target_name)
def store_public(self, keyname, local_file, target_name=None):
if target_name is None:
target_name = os.path.basename(local_file)
target_filepath = create_pub_filepath(self.entry, target_name)
if keyname in self.entry.media_files:
_log.warn("store_public: keyname %r already used for file %r, "
"replacing with %r", keyname,
self.entry.media_files[keyname], target_filepath)
mgg.public_store.copy_local_to_storage(local_file, target_filepath)
self.entry.media_files[keyname] = target_filepath
def delete_queue_file(self):
# Remove queued media file from storage and database.
# queued_filepath is in the task_id directory which should
# be removed too, but fail if the directory is not empty to be on
# the super-safe side.
queued_filepath = self.entry.queued_media_file
mgg.queue_store.delete_file(queued_filepath) # rm file
mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
self.entry.queued_media_file = []
def mark_entry_failed(entry_id, exc):
"""
Mark a media entry as having failed in its conversion.
Uses the exception that was raised to mark more information. If
the exception is a derivative of BaseProcessingFail then we can
store extra information that can be useful for users telling them
why their media failed to process.
Args:
- entry_id: The id of the media entry
"""
# Was this a BaseProcessingFail? In other words, was this a
# type of error that we know how to handle?
if isinstance(exc, BaseProcessingFail):
# Looks like yes, so record information about that failure and any
# metadata the user might have supplied.
atomic_update(mgg.database.MediaEntry,
{'id': entry_id},
{u'state': u'failed',
u'fail_error': unicode(exc.exception_path),
u'fail_metadata': exc.metadata})
else:
_log.warn("No idea what happened here, but it failed: %r", exc)
# Looks like no, so just mark it as failed and don't record a
# failure_error (we'll assume it wasn't handled) and don't record
# metadata (in fact overwrite it if somehow it had previous info
# here)
atomic_update(mgg.database.MediaEntry,
{'id': entry_id},
{u'state': u'failed',
u'fail_error': None,
u'fail_metadata': {}})
class BaseProcessingFail(Exception):
"""
Base exception that all other processing failure messages should
subclass from.
You shouldn't call this itself; instead you should subclass it
and provid the exception_path and general_message applicable to
this error.
"""
general_message = u''
@property
def exception_path(self):
return u"%s:%s" % (
self.__class__.__module__, self.__class__.__name__)
def __init__(self, **metadata):
self.metadata = metadata or {}
class BadMediaFail(BaseProcessingFail):
"""
Error that should be raised when an inappropriate file was given
for the media type specified.
"""
general_message = _(u'Invalid file given for media type.')