Added initial processor for ascii media
This commit is contained in:
parent
57d1cb3cef
commit
35d6a95008
@ -13,6 +13,7 @@
|
|||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
import argparse
|
||||||
import chardet
|
import chardet
|
||||||
import os
|
import os
|
||||||
try:
|
try:
|
||||||
@ -22,7 +23,11 @@ except ImportError:
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from mediagoblin import mg_globals as mgg
|
from mediagoblin import mg_globals as mgg
|
||||||
from mediagoblin.processing import create_pub_filepath
|
from mediagoblin.processing import (
|
||||||
|
create_pub_filepath, FilenameBuilder,
|
||||||
|
MediaProcessor, ProcessingManager,
|
||||||
|
get_orig_filename, copy_original,
|
||||||
|
store_public, request_from_args)
|
||||||
from mediagoblin.media_types.ascii import asciitoimage
|
from mediagoblin.media_types.ascii import asciitoimage
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
@ -43,106 +48,148 @@ def sniff_handler(media_file, **kw):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_ascii(proc_state):
|
class CommonAsciiProcessor(MediaProcessor):
|
||||||
"""Code to process a txt file. Will be run by celery.
|
|
||||||
|
|
||||||
A Workbench() represents a local tempory dir. It is automatically
|
|
||||||
cleaned up when this function exits.
|
|
||||||
"""
|
"""
|
||||||
entry = proc_state.entry
|
Provides a base for various ascii processing steps
|
||||||
workbench = proc_state.workbench
|
"""
|
||||||
ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
|
def common_setup(self):
|
||||||
# Conversions subdirectory to avoid collisions
|
self.ascii_config = mgg.global_config[
|
||||||
conversions_subdir = os.path.join(
|
'media_type:mediagoblin.media_types.ascii']
|
||||||
workbench.dir, 'conversions')
|
|
||||||
os.mkdir(conversions_subdir)
|
|
||||||
|
|
||||||
queued_filepath = entry.queued_media_file
|
# Conversions subdirectory to avoid collisions
|
||||||
queued_filename = workbench.localized_file(
|
self.conversions_subdir = os.path.join(
|
||||||
mgg.queue_store, queued_filepath,
|
self.workbench.dir, 'convirsions')
|
||||||
'source')
|
os.mkdir(self.conversions_subdir)
|
||||||
|
|
||||||
queued_file = file(queued_filename, 'rb')
|
# Pull down and set up the original file
|
||||||
|
self.orig_filename = get_orig_filename(
|
||||||
|
self.entry, self.workbench)
|
||||||
|
self.name_builder = FilenameBuilder(self.orig_filename)
|
||||||
|
|
||||||
with queued_file:
|
self.charset = None
|
||||||
queued_file_charset = chardet.detect(queued_file.read())
|
|
||||||
|
def copy_original(self):
|
||||||
|
copy_original(
|
||||||
|
self.entry, self.orig_filename,
|
||||||
|
self.name_builder.fill('{basename}{ext}'))
|
||||||
|
|
||||||
|
def _detect_charset(self, orig_file):
|
||||||
|
d_charset = chardet.detect(orig_file.read())
|
||||||
|
|
||||||
# Only select a non-utf-8 charset if chardet is *really* sure
|
# Only select a non-utf-8 charset if chardet is *really* sure
|
||||||
# Tested with "Feli\x0109an superjaron", which was detecte
|
# Tested with "Feli\x0109an superjaron", which was detected
|
||||||
if queued_file_charset['confidence'] < 0.9:
|
if d_charset['confidence'] < 0.9:
|
||||||
interpreted_charset = 'utf-8'
|
self.charset = 'utf-8'
|
||||||
else:
|
else:
|
||||||
interpreted_charset = queued_file_charset['encoding']
|
self.charset = d_charset['encoding']
|
||||||
|
|
||||||
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
|
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
|
||||||
queued_file_charset,
|
d_charset,
|
||||||
interpreted_charset))
|
self.charset))
|
||||||
|
|
||||||
queued_file.seek(0) # Rewind the queued file
|
def store_unicode_file(self):
|
||||||
|
with file(self.orig_filename, 'rb') as orig_file:
|
||||||
|
self._detect_charset(orig_file)
|
||||||
|
unicode_filepath = create_pub_filepath(self.entry,
|
||||||
|
'ascii-portable.txt')
|
||||||
|
|
||||||
thumb_filepath = create_pub_filepath(
|
with mgg.public_store.get_file(unicode_filepath, 'wb') \
|
||||||
entry, 'thumbnail.png')
|
as unicode_file:
|
||||||
|
# Decode the original file from its detected charset (or UTF8)
|
||||||
|
# Encode the unicode instance to ASCII and replace any
|
||||||
|
# non-ASCII with an HTML entity (&#
|
||||||
|
unicode_file.write(
|
||||||
|
unicode(orig_file.read().decode(
|
||||||
|
self.charset)).encode(
|
||||||
|
'ascii',
|
||||||
|
'xmlcharrefreplace'))
|
||||||
|
|
||||||
tmp_thumb_filename = os.path.join(
|
self.entry.media_files['unicode'] = unicode_filepath
|
||||||
conversions_subdir, thumb_filepath[-1])
|
|
||||||
|
|
||||||
ascii_converter_args = {}
|
def generate_thumb(self, font=None, thumb_size=None):
|
||||||
|
with file(self.orig_filename, 'rb') as orig_file:
|
||||||
|
# If no font kwarg, check config
|
||||||
|
if not font:
|
||||||
|
font = self.ascii_config.get('thumbnail_font', None)
|
||||||
|
if not thumb_size:
|
||||||
|
thumb_size = (mgg.global_config['media:thumb']['max_width'],
|
||||||
|
mgg.global_config['media:thumb']['max_height'])
|
||||||
|
|
||||||
if ascii_config['thumbnail_font']:
|
tmp_thumb = os.path.join(
|
||||||
ascii_converter_args.update(
|
self.conversions_subdir,
|
||||||
{'font': ascii_config['thumbnail_font']})
|
self.name_builder.fill('{basename}.thumbnail.png'))
|
||||||
|
|
||||||
converter = asciitoimage.AsciiToImage(
|
ascii_converter_args = {}
|
||||||
**ascii_converter_args)
|
|
||||||
|
|
||||||
thumb = converter._create_image(
|
# If there is a font from either the config or kwarg, update
|
||||||
queued_file.read())
|
# ascii_converter_args
|
||||||
|
if font:
|
||||||
|
ascii_converter_args.update(
|
||||||
|
{'font': self.ascii_config['thumbnail_font']})
|
||||||
|
|
||||||
with file(tmp_thumb_filename, 'w') as thumb_file:
|
converter = asciitoimage.AsciiToImage(
|
||||||
thumb.thumbnail(
|
**ascii_converter_args)
|
||||||
(mgg.global_config['media:thumb']['max_width'],
|
|
||||||
mgg.global_config['media:thumb']['max_height']),
|
|
||||||
Image.ANTIALIAS)
|
|
||||||
thumb.save(thumb_file)
|
|
||||||
|
|
||||||
_log.debug('Copying local file to public storage')
|
thumb = converter._create_image(
|
||||||
mgg.public_store.copy_local_to_storage(
|
orig_file.read())
|
||||||
tmp_thumb_filename, thumb_filepath)
|
|
||||||
|
|
||||||
queued_file.seek(0)
|
with file(tmp_thumb, 'w') as thumb_file:
|
||||||
|
thumb.thumbnail(
|
||||||
|
thumb_size,
|
||||||
|
Image.ANTIALIAS)
|
||||||
|
thumb.save(thumb_file)
|
||||||
|
|
||||||
original_filepath = create_pub_filepath(entry, queued_filepath[-1])
|
_log.debug('Copying local file to public storage')
|
||||||
|
store_public(self.entry, 'thumb', tmp_thumb,
|
||||||
|
self.name_builder.fill('{basename}.thumbnail.jpg'))
|
||||||
|
|
||||||
with mgg.public_store.get_file(original_filepath, 'wb') \
|
|
||||||
as original_file:
|
|
||||||
original_file.write(queued_file.read())
|
|
||||||
|
|
||||||
queued_file.seek(0) # Rewind *again*
|
class InitialProcessor(CommonAsciiProcessor):
|
||||||
|
"""
|
||||||
|
Initial processing step for new ascii media
|
||||||
|
"""
|
||||||
|
name = "initial"
|
||||||
|
description = "Initial processing"
|
||||||
|
|
||||||
unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
|
@classmethod
|
||||||
|
def media_is_eligible(cls, entry=None, state=None):
|
||||||
|
if not state:
|
||||||
|
state = entry.state
|
||||||
|
return state in (
|
||||||
|
"unprocessed", "failed")
|
||||||
|
|
||||||
with mgg.public_store.get_file(unicode_filepath, 'wb') \
|
@classmethod
|
||||||
as unicode_file:
|
def generate_parser(cls):
|
||||||
# Decode the original file from its detected charset (or UTF8)
|
parser = argparse.ArgumentParser(
|
||||||
# Encode the unicode instance to ASCII and replace any non-ASCII
|
description=cls.description,
|
||||||
# with an HTML entity (&#
|
prog=cls.name)
|
||||||
unicode_file.write(
|
|
||||||
unicode(queued_file.read().decode(
|
|
||||||
interpreted_charset)).encode(
|
|
||||||
'ascii',
|
|
||||||
'xmlcharrefreplace'))
|
|
||||||
|
|
||||||
# Remove queued media file from storage and database.
|
parser.add_argument(
|
||||||
# queued_filepath is in the task_id directory which should
|
'--thumb_size',
|
||||||
# be removed too, but fail if the directory is not empty to be on
|
nargs=2,
|
||||||
# the super-safe side.
|
metavar=('max_width', 'max_width'),
|
||||||
mgg.queue_store.delete_file(queued_filepath) # rm file
|
type=int)
|
||||||
mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
|
|
||||||
entry.queued_media_file = []
|
|
||||||
|
|
||||||
media_files_dict = entry.setdefault('media_files', {})
|
parser.add_argument(
|
||||||
media_files_dict['thumb'] = thumb_filepath
|
'--font',
|
||||||
media_files_dict['unicode'] = unicode_filepath
|
help='the thumbnail font')
|
||||||
media_files_dict['original'] = original_filepath
|
|
||||||
|
|
||||||
entry.save()
|
return parser
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def args_to_request(cls, args):
|
||||||
|
return request_from_args(
|
||||||
|
args, ['thumb_size', 'font'])
|
||||||
|
|
||||||
|
def process(self, thumb_size=None, font=None):
|
||||||
|
self.common_setup()
|
||||||
|
self.store_unicode_file()
|
||||||
|
self.generate_thumb(thumb_size=thumb_size, font=font)
|
||||||
|
self.copy_original()
|
||||||
|
self.delete_queue_file()
|
||||||
|
|
||||||
|
|
||||||
|
class AsciiProcessingManager(ProcessingManager):
|
||||||
|
def __init__(self):
|
||||||
|
super(self.__class__, self).__init__()
|
||||||
|
self.add_processor(InitialProcessor)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user