Added initial processor for ascii media

This commit is contained in:
Rodney Ewing 2013-08-13 17:32:59 -07:00
parent 57d1cb3cef
commit 35d6a95008

View File

@ -13,6 +13,7 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import chardet
import os
try:
@ -22,7 +23,11 @@ except ImportError:
import logging
from mediagoblin import mg_globals as mgg
from mediagoblin.processing import create_pub_filepath
from mediagoblin.processing import (
create_pub_filepath, FilenameBuilder,
MediaProcessor, ProcessingManager,
get_orig_filename, copy_original,
store_public, request_from_args)
from mediagoblin.media_types.ascii import asciitoimage
_log = logging.getLogger(__name__)
@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw):
return None
def process_ascii(proc_state):
"""Code to process a txt file. Will be run by celery.
A Workbench() represents a local tempory dir. It is automatically
cleaned up when this function exits.
class CommonAsciiProcessor(MediaProcessor):
"""
entry = proc_state.entry
workbench = proc_state.workbench
ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
# Conversions subdirectory to avoid collisions
conversions_subdir = os.path.join(
workbench.dir, 'conversions')
os.mkdir(conversions_subdir)
Provides a base for various ascii processing steps
"""
def common_setup(self):
self.ascii_config = mgg.global_config[
'media_type:mediagoblin.media_types.ascii']
queued_filepath = entry.queued_media_file
queued_filename = workbench.localized_file(
mgg.queue_store, queued_filepath,
'source')
# Conversions subdirectory to avoid collisions
self.conversions_subdir = os.path.join(
self.workbench.dir, 'convirsions')
os.mkdir(self.conversions_subdir)
queued_file = file(queued_filename, 'rb')
# Pull down and set up the original file
self.orig_filename = get_orig_filename(
self.entry, self.workbench)
self.name_builder = FilenameBuilder(self.orig_filename)
with queued_file:
queued_file_charset = chardet.detect(queued_file.read())
self.charset = None
def copy_original(self):
copy_original(
self.entry, self.orig_filename,
self.name_builder.fill('{basename}{ext}'))
def _detect_charset(self, orig_file):
d_charset = chardet.detect(orig_file.read())
# Only select a non-utf-8 charset if chardet is *really* sure
# Tested with "Feli\x0109an superjaron", which was detecte
if queued_file_charset['confidence'] < 0.9:
interpreted_charset = 'utf-8'
# Tested with "Feli\x0109an superjaron", which was detected
if d_charset['confidence'] < 0.9:
self.charset = 'utf-8'
else:
interpreted_charset = queued_file_charset['encoding']
self.charset = d_charset['encoding']
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
queued_file_charset,
interpreted_charset))
d_charset,
self.charset))
queued_file.seek(0) # Rewind the queued file
def store_unicode_file(self):
with file(self.orig_filename, 'rb') as orig_file:
self._detect_charset(orig_file)
unicode_filepath = create_pub_filepath(self.entry,
'ascii-portable.txt')
thumb_filepath = create_pub_filepath(
entry, 'thumbnail.png')
with mgg.public_store.get_file(unicode_filepath, 'wb') \
as unicode_file:
# Decode the original file from its detected charset (or UTF8)
# Encode the unicode instance to ASCII and replace any
# non-ASCII with an HTML entity (&#
unicode_file.write(
unicode(orig_file.read().decode(
self.charset)).encode(
'ascii',
'xmlcharrefreplace'))
tmp_thumb_filename = os.path.join(
conversions_subdir, thumb_filepath[-1])
self.entry.media_files['unicode'] = unicode_filepath
ascii_converter_args = {}
def generate_thumb(self, font=None, thumb_size=None):
with file(self.orig_filename, 'rb') as orig_file:
# If no font kwarg, check config
if not font:
font = self.ascii_config.get('thumbnail_font', None)
if not thumb_size:
thumb_size = (mgg.global_config['media:thumb']['max_width'],
mgg.global_config['media:thumb']['max_height'])
if ascii_config['thumbnail_font']:
ascii_converter_args.update(
{'font': ascii_config['thumbnail_font']})
tmp_thumb = os.path.join(
self.conversions_subdir,
self.name_builder.fill('{basename}.thumbnail.png'))
converter = asciitoimage.AsciiToImage(
**ascii_converter_args)
ascii_converter_args = {}
thumb = converter._create_image(
queued_file.read())
# If there is a font from either the config or kwarg, update
# ascii_converter_args
if font:
ascii_converter_args.update(
{'font': self.ascii_config['thumbnail_font']})
with file(tmp_thumb_filename, 'w') as thumb_file:
thumb.thumbnail(
(mgg.global_config['media:thumb']['max_width'],
mgg.global_config['media:thumb']['max_height']),
Image.ANTIALIAS)
thumb.save(thumb_file)
converter = asciitoimage.AsciiToImage(
**ascii_converter_args)
_log.debug('Copying local file to public storage')
mgg.public_store.copy_local_to_storage(
tmp_thumb_filename, thumb_filepath)
thumb = converter._create_image(
orig_file.read())
queued_file.seek(0)
with file(tmp_thumb, 'w') as thumb_file:
thumb.thumbnail(
thumb_size,
Image.ANTIALIAS)
thumb.save(thumb_file)
original_filepath = create_pub_filepath(entry, queued_filepath[-1])
_log.debug('Copying local file to public storage')
store_public(self.entry, 'thumb', tmp_thumb,
self.name_builder.fill('{basename}.thumbnail.jpg'))
with mgg.public_store.get_file(original_filepath, 'wb') \
as original_file:
original_file.write(queued_file.read())
queued_file.seek(0) # Rewind *again*
class InitialProcessor(CommonAsciiProcessor):
"""
Initial processing step for new ascii media
"""
name = "initial"
description = "Initial processing"
unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
@classmethod
def media_is_eligible(cls, entry=None, state=None):
if not state:
state = entry.state
return state in (
"unprocessed", "failed")
with mgg.public_store.get_file(unicode_filepath, 'wb') \
as unicode_file:
# Decode the original file from its detected charset (or UTF8)
# Encode the unicode instance to ASCII and replace any non-ASCII
# with an HTML entity (&#
unicode_file.write(
unicode(queued_file.read().decode(
interpreted_charset)).encode(
'ascii',
'xmlcharrefreplace'))
@classmethod
def generate_parser(cls):
parser = argparse.ArgumentParser(
description=cls.description,
prog=cls.name)
# Remove queued media file from storage and database.
# queued_filepath is in the task_id directory which should
# be removed too, but fail if the directory is not empty to be on
# the super-safe side.
mgg.queue_store.delete_file(queued_filepath) # rm file
mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
entry.queued_media_file = []
parser.add_argument(
'--thumb_size',
nargs=2,
metavar=('max_width', 'max_width'),
type=int)
media_files_dict = entry.setdefault('media_files', {})
media_files_dict['thumb'] = thumb_filepath
media_files_dict['unicode'] = unicode_filepath
media_files_dict['original'] = original_filepath
parser.add_argument(
'--font',
help='the thumbnail font')
entry.save()
return parser
@classmethod
def args_to_request(cls, args):
return request_from_args(
args, ['thumb_size', 'font'])
def process(self, thumb_size=None, font=None):
self.common_setup()
self.store_unicode_file()
self.generate_thumb(thumb_size=thumb_size, font=font)
self.copy_original()
self.delete_queue_file()
class AsciiProcessingManager(ProcessingManager):
def __init__(self):
super(self.__class__, self).__init__()
self.add_processor(InitialProcessor)