Added initial processor for ascii media
This commit is contained in:
parent
57d1cb3cef
commit
35d6a95008
@ -13,6 +13,7 @@
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
import argparse
|
||||
import chardet
|
||||
import os
|
||||
try:
|
||||
@ -22,7 +23,11 @@ except ImportError:
|
||||
import logging
|
||||
|
||||
from mediagoblin import mg_globals as mgg
|
||||
from mediagoblin.processing import create_pub_filepath
|
||||
from mediagoblin.processing import (
|
||||
create_pub_filepath, FilenameBuilder,
|
||||
MediaProcessor, ProcessingManager,
|
||||
get_orig_filename, copy_original,
|
||||
store_public, request_from_args)
|
||||
from mediagoblin.media_types.ascii import asciitoimage
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw):
|
||||
return None
|
||||
|
||||
|
||||
def process_ascii(proc_state):
|
||||
"""Code to process a txt file. Will be run by celery.
|
||||
|
||||
A Workbench() represents a local tempory dir. It is automatically
|
||||
cleaned up when this function exits.
|
||||
class CommonAsciiProcessor(MediaProcessor):
|
||||
"""
|
||||
entry = proc_state.entry
|
||||
workbench = proc_state.workbench
|
||||
ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
|
||||
# Conversions subdirectory to avoid collisions
|
||||
conversions_subdir = os.path.join(
|
||||
workbench.dir, 'conversions')
|
||||
os.mkdir(conversions_subdir)
|
||||
Provides a base for various ascii processing steps
|
||||
"""
|
||||
def common_setup(self):
|
||||
self.ascii_config = mgg.global_config[
|
||||
'media_type:mediagoblin.media_types.ascii']
|
||||
|
||||
queued_filepath = entry.queued_media_file
|
||||
queued_filename = workbench.localized_file(
|
||||
mgg.queue_store, queued_filepath,
|
||||
'source')
|
||||
# Conversions subdirectory to avoid collisions
|
||||
self.conversions_subdir = os.path.join(
|
||||
self.workbench.dir, 'convirsions')
|
||||
os.mkdir(self.conversions_subdir)
|
||||
|
||||
queued_file = file(queued_filename, 'rb')
|
||||
# Pull down and set up the original file
|
||||
self.orig_filename = get_orig_filename(
|
||||
self.entry, self.workbench)
|
||||
self.name_builder = FilenameBuilder(self.orig_filename)
|
||||
|
||||
with queued_file:
|
||||
queued_file_charset = chardet.detect(queued_file.read())
|
||||
self.charset = None
|
||||
|
||||
def copy_original(self):
|
||||
copy_original(
|
||||
self.entry, self.orig_filename,
|
||||
self.name_builder.fill('{basename}{ext}'))
|
||||
|
||||
def _detect_charset(self, orig_file):
|
||||
d_charset = chardet.detect(orig_file.read())
|
||||
|
||||
# Only select a non-utf-8 charset if chardet is *really* sure
|
||||
# Tested with "Feli\x0109an superjaron", which was detecte
|
||||
if queued_file_charset['confidence'] < 0.9:
|
||||
interpreted_charset = 'utf-8'
|
||||
# Tested with "Feli\x0109an superjaron", which was detected
|
||||
if d_charset['confidence'] < 0.9:
|
||||
self.charset = 'utf-8'
|
||||
else:
|
||||
interpreted_charset = queued_file_charset['encoding']
|
||||
self.charset = d_charset['encoding']
|
||||
|
||||
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
|
||||
queued_file_charset,
|
||||
interpreted_charset))
|
||||
d_charset,
|
||||
self.charset))
|
||||
|
||||
queued_file.seek(0) # Rewind the queued file
|
||||
def store_unicode_file(self):
|
||||
with file(self.orig_filename, 'rb') as orig_file:
|
||||
self._detect_charset(orig_file)
|
||||
unicode_filepath = create_pub_filepath(self.entry,
|
||||
'ascii-portable.txt')
|
||||
|
||||
thumb_filepath = create_pub_filepath(
|
||||
entry, 'thumbnail.png')
|
||||
with mgg.public_store.get_file(unicode_filepath, 'wb') \
|
||||
as unicode_file:
|
||||
# Decode the original file from its detected charset (or UTF8)
|
||||
# Encode the unicode instance to ASCII and replace any
|
||||
# non-ASCII with an HTML entity (&#
|
||||
unicode_file.write(
|
||||
unicode(orig_file.read().decode(
|
||||
self.charset)).encode(
|
||||
'ascii',
|
||||
'xmlcharrefreplace'))
|
||||
|
||||
tmp_thumb_filename = os.path.join(
|
||||
conversions_subdir, thumb_filepath[-1])
|
||||
self.entry.media_files['unicode'] = unicode_filepath
|
||||
|
||||
ascii_converter_args = {}
|
||||
def generate_thumb(self, font=None, thumb_size=None):
|
||||
with file(self.orig_filename, 'rb') as orig_file:
|
||||
# If no font kwarg, check config
|
||||
if not font:
|
||||
font = self.ascii_config.get('thumbnail_font', None)
|
||||
if not thumb_size:
|
||||
thumb_size = (mgg.global_config['media:thumb']['max_width'],
|
||||
mgg.global_config['media:thumb']['max_height'])
|
||||
|
||||
if ascii_config['thumbnail_font']:
|
||||
ascii_converter_args.update(
|
||||
{'font': ascii_config['thumbnail_font']})
|
||||
tmp_thumb = os.path.join(
|
||||
self.conversions_subdir,
|
||||
self.name_builder.fill('{basename}.thumbnail.png'))
|
||||
|
||||
converter = asciitoimage.AsciiToImage(
|
||||
**ascii_converter_args)
|
||||
ascii_converter_args = {}
|
||||
|
||||
thumb = converter._create_image(
|
||||
queued_file.read())
|
||||
# If there is a font from either the config or kwarg, update
|
||||
# ascii_converter_args
|
||||
if font:
|
||||
ascii_converter_args.update(
|
||||
{'font': self.ascii_config['thumbnail_font']})
|
||||
|
||||
with file(tmp_thumb_filename, 'w') as thumb_file:
|
||||
thumb.thumbnail(
|
||||
(mgg.global_config['media:thumb']['max_width'],
|
||||
mgg.global_config['media:thumb']['max_height']),
|
||||
Image.ANTIALIAS)
|
||||
thumb.save(thumb_file)
|
||||
converter = asciitoimage.AsciiToImage(
|
||||
**ascii_converter_args)
|
||||
|
||||
_log.debug('Copying local file to public storage')
|
||||
mgg.public_store.copy_local_to_storage(
|
||||
tmp_thumb_filename, thumb_filepath)
|
||||
thumb = converter._create_image(
|
||||
orig_file.read())
|
||||
|
||||
queued_file.seek(0)
|
||||
with file(tmp_thumb, 'w') as thumb_file:
|
||||
thumb.thumbnail(
|
||||
thumb_size,
|
||||
Image.ANTIALIAS)
|
||||
thumb.save(thumb_file)
|
||||
|
||||
original_filepath = create_pub_filepath(entry, queued_filepath[-1])
|
||||
_log.debug('Copying local file to public storage')
|
||||
store_public(self.entry, 'thumb', tmp_thumb,
|
||||
self.name_builder.fill('{basename}.thumbnail.jpg'))
|
||||
|
||||
with mgg.public_store.get_file(original_filepath, 'wb') \
|
||||
as original_file:
|
||||
original_file.write(queued_file.read())
|
||||
|
||||
queued_file.seek(0) # Rewind *again*
|
||||
class InitialProcessor(CommonAsciiProcessor):
|
||||
"""
|
||||
Initial processing step for new ascii media
|
||||
"""
|
||||
name = "initial"
|
||||
description = "Initial processing"
|
||||
|
||||
unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
|
||||
@classmethod
|
||||
def media_is_eligible(cls, entry=None, state=None):
|
||||
if not state:
|
||||
state = entry.state
|
||||
return state in (
|
||||
"unprocessed", "failed")
|
||||
|
||||
with mgg.public_store.get_file(unicode_filepath, 'wb') \
|
||||
as unicode_file:
|
||||
# Decode the original file from its detected charset (or UTF8)
|
||||
# Encode the unicode instance to ASCII and replace any non-ASCII
|
||||
# with an HTML entity (&#
|
||||
unicode_file.write(
|
||||
unicode(queued_file.read().decode(
|
||||
interpreted_charset)).encode(
|
||||
'ascii',
|
||||
'xmlcharrefreplace'))
|
||||
@classmethod
|
||||
def generate_parser(cls):
|
||||
parser = argparse.ArgumentParser(
|
||||
description=cls.description,
|
||||
prog=cls.name)
|
||||
|
||||
# Remove queued media file from storage and database.
|
||||
# queued_filepath is in the task_id directory which should
|
||||
# be removed too, but fail if the directory is not empty to be on
|
||||
# the super-safe side.
|
||||
mgg.queue_store.delete_file(queued_filepath) # rm file
|
||||
mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
|
||||
entry.queued_media_file = []
|
||||
parser.add_argument(
|
||||
'--thumb_size',
|
||||
nargs=2,
|
||||
metavar=('max_width', 'max_width'),
|
||||
type=int)
|
||||
|
||||
media_files_dict = entry.setdefault('media_files', {})
|
||||
media_files_dict['thumb'] = thumb_filepath
|
||||
media_files_dict['unicode'] = unicode_filepath
|
||||
media_files_dict['original'] = original_filepath
|
||||
parser.add_argument(
|
||||
'--font',
|
||||
help='the thumbnail font')
|
||||
|
||||
entry.save()
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
def args_to_request(cls, args):
|
||||
return request_from_args(
|
||||
args, ['thumb_size', 'font'])
|
||||
|
||||
def process(self, thumb_size=None, font=None):
|
||||
self.common_setup()
|
||||
self.store_unicode_file()
|
||||
self.generate_thumb(thumb_size=thumb_size, font=font)
|
||||
self.copy_original()
|
||||
self.delete_queue_file()
|
||||
|
||||
|
||||
class AsciiProcessingManager(ProcessingManager):
|
||||
def __init__(self):
|
||||
super(self.__class__, self).__init__()
|
||||
self.add_processor(InitialProcessor)
|
||||
|
Loading…
x
Reference in New Issue
Block a user