Added initial processor for ascii media

This commit is contained in:
Rodney Ewing 2013-08-13 17:32:59 -07:00
parent 57d1cb3cef
commit 35d6a95008

View File

@ -13,6 +13,7 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import chardet import chardet
import os import os
try: try:
@ -22,7 +23,11 @@ except ImportError:
import logging import logging
from mediagoblin import mg_globals as mgg from mediagoblin import mg_globals as mgg
from mediagoblin.processing import create_pub_filepath from mediagoblin.processing import (
create_pub_filepath, FilenameBuilder,
MediaProcessor, ProcessingManager,
get_orig_filename, copy_original,
store_public, request_from_args)
from mediagoblin.media_types.ascii import asciitoimage from mediagoblin.media_types.ascii import asciitoimage
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw):
return None return None
def process_ascii(proc_state): class CommonAsciiProcessor(MediaProcessor):
"""Code to process a txt file. Will be run by celery.
A Workbench() represents a local tempory dir. It is automatically
cleaned up when this function exits.
""" """
entry = proc_state.entry Provides a base for various ascii processing steps
workbench = proc_state.workbench """
ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii'] def common_setup(self):
# Conversions subdirectory to avoid collisions self.ascii_config = mgg.global_config[
conversions_subdir = os.path.join( 'media_type:mediagoblin.media_types.ascii']
workbench.dir, 'conversions')
os.mkdir(conversions_subdir)
queued_filepath = entry.queued_media_file # Conversions subdirectory to avoid collisions
queued_filename = workbench.localized_file( self.conversions_subdir = os.path.join(
mgg.queue_store, queued_filepath, self.workbench.dir, 'convirsions')
'source') os.mkdir(self.conversions_subdir)
queued_file = file(queued_filename, 'rb') # Pull down and set up the original file
self.orig_filename = get_orig_filename(
self.entry, self.workbench)
self.name_builder = FilenameBuilder(self.orig_filename)
with queued_file: self.charset = None
queued_file_charset = chardet.detect(queued_file.read())
def copy_original(self):
copy_original(
self.entry, self.orig_filename,
self.name_builder.fill('{basename}{ext}'))
def _detect_charset(self, orig_file):
d_charset = chardet.detect(orig_file.read())
# Only select a non-utf-8 charset if chardet is *really* sure # Only select a non-utf-8 charset if chardet is *really* sure
# Tested with "Feli\x0109an superjaron", which was detecte # Tested with "Feli\x0109an superjaron", which was detected
if queued_file_charset['confidence'] < 0.9: if d_charset['confidence'] < 0.9:
interpreted_charset = 'utf-8' self.charset = 'utf-8'
else: else:
interpreted_charset = queued_file_charset['encoding'] self.charset = d_charset['encoding']
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format( _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
queued_file_charset, d_charset,
interpreted_charset)) self.charset))
queued_file.seek(0) # Rewind the queued file def store_unicode_file(self):
with file(self.orig_filename, 'rb') as orig_file:
self._detect_charset(orig_file)
unicode_filepath = create_pub_filepath(self.entry,
'ascii-portable.txt')
thumb_filepath = create_pub_filepath( with mgg.public_store.get_file(unicode_filepath, 'wb') \
entry, 'thumbnail.png') as unicode_file:
# Decode the original file from its detected charset (or UTF8)
# Encode the unicode instance to ASCII and replace any
# non-ASCII with an HTML entity (&#
unicode_file.write(
unicode(orig_file.read().decode(
self.charset)).encode(
'ascii',
'xmlcharrefreplace'))
tmp_thumb_filename = os.path.join( self.entry.media_files['unicode'] = unicode_filepath
conversions_subdir, thumb_filepath[-1])
ascii_converter_args = {} def generate_thumb(self, font=None, thumb_size=None):
with file(self.orig_filename, 'rb') as orig_file:
# If no font kwarg, check config
if not font:
font = self.ascii_config.get('thumbnail_font', None)
if not thumb_size:
thumb_size = (mgg.global_config['media:thumb']['max_width'],
mgg.global_config['media:thumb']['max_height'])
if ascii_config['thumbnail_font']: tmp_thumb = os.path.join(
ascii_converter_args.update( self.conversions_subdir,
{'font': ascii_config['thumbnail_font']}) self.name_builder.fill('{basename}.thumbnail.png'))
converter = asciitoimage.AsciiToImage( ascii_converter_args = {}
**ascii_converter_args)
thumb = converter._create_image( # If there is a font from either the config or kwarg, update
queued_file.read()) # ascii_converter_args
if font:
ascii_converter_args.update(
{'font': self.ascii_config['thumbnail_font']})
with file(tmp_thumb_filename, 'w') as thumb_file: converter = asciitoimage.AsciiToImage(
thumb.thumbnail( **ascii_converter_args)
(mgg.global_config['media:thumb']['max_width'],
mgg.global_config['media:thumb']['max_height']),
Image.ANTIALIAS)
thumb.save(thumb_file)
_log.debug('Copying local file to public storage') thumb = converter._create_image(
mgg.public_store.copy_local_to_storage( orig_file.read())
tmp_thumb_filename, thumb_filepath)
queued_file.seek(0) with file(tmp_thumb, 'w') as thumb_file:
thumb.thumbnail(
thumb_size,
Image.ANTIALIAS)
thumb.save(thumb_file)
original_filepath = create_pub_filepath(entry, queued_filepath[-1]) _log.debug('Copying local file to public storage')
store_public(self.entry, 'thumb', tmp_thumb,
self.name_builder.fill('{basename}.thumbnail.jpg'))
with mgg.public_store.get_file(original_filepath, 'wb') \
as original_file:
original_file.write(queued_file.read())
queued_file.seek(0) # Rewind *again* class InitialProcessor(CommonAsciiProcessor):
"""
Initial processing step for new ascii media
"""
name = "initial"
description = "Initial processing"
unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') @classmethod
def media_is_eligible(cls, entry=None, state=None):
if not state:
state = entry.state
return state in (
"unprocessed", "failed")
with mgg.public_store.get_file(unicode_filepath, 'wb') \ @classmethod
as unicode_file: def generate_parser(cls):
# Decode the original file from its detected charset (or UTF8) parser = argparse.ArgumentParser(
# Encode the unicode instance to ASCII and replace any non-ASCII description=cls.description,
# with an HTML entity (&# prog=cls.name)
unicode_file.write(
unicode(queued_file.read().decode(
interpreted_charset)).encode(
'ascii',
'xmlcharrefreplace'))
# Remove queued media file from storage and database. parser.add_argument(
# queued_filepath is in the task_id directory which should '--thumb_size',
# be removed too, but fail if the directory is not empty to be on nargs=2,
# the super-safe side. metavar=('max_width', 'max_width'),
mgg.queue_store.delete_file(queued_filepath) # rm file type=int)
mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
entry.queued_media_file = []
media_files_dict = entry.setdefault('media_files', {}) parser.add_argument(
media_files_dict['thumb'] = thumb_filepath '--font',
media_files_dict['unicode'] = unicode_filepath help='the thumbnail font')
media_files_dict['original'] = original_filepath
entry.save() return parser
@classmethod
def args_to_request(cls, args):
return request_from_args(
args, ['thumb_size', 'font'])
def process(self, thumb_size=None, font=None):
self.common_setup()
self.store_unicode_file()
self.generate_thumb(thumb_size=thumb_size, font=font)
self.copy_original()
self.delete_queue_file()
class AsciiProcessingManager(ProcessingManager):
def __init__(self):
super(self.__class__, self).__init__()
self.add_processor(InitialProcessor)