ASCII art support - Fixes

- Improved(?) character set detection, chardet will not win
  over UTF-8 unless it is >= 90% sure.
- Changed the unicode.txt to ascii-portable.txt, since there is
  no unicode in the file.
- etc.
This commit is contained in:
Joar Wandborg 2012-02-02 21:28:21 +01:00
parent 3f1dc64ed1
commit 010d28b4f0
2 changed files with 25 additions and 23 deletions

View File

@ -65,7 +65,8 @@ class AsciiToImage(object):
self._if = ImageFont.truetype(
self._font,
self._font_size)
self._font_size,
encoding='unic')
# ,-,-^-'-^'^-^'^-'^-.
# ( I am a wall socket )Oo, ___
@ -91,6 +92,9 @@ class AsciiToImage(object):
- Character set detection and decoding,
http://pypi.python.org/pypi/chardet
'''
# Convert the input from str to unicode
text = text.decode('utf-8')
# TODO: Account for alternative line endings
lines = text.split('\n')
@ -123,7 +127,7 @@ class AsciiToImage(object):
px_pos = self._px_pos(char_pos)
_log.debug('Writing character "{0}" at {1} (px pos {2}'.format(
_log.debug('Writing character "{0}" at {1} (px pos {2})'.format(
char,
char_pos,
px_pos))
@ -152,21 +156,3 @@ class AsciiToImage(object):
px_pos[index] = char_pos[index] * self._if_dims[index]
return px_pos
if __name__ == "__main__":
import urllib
txt = urllib.urlopen('file:///home/joar/Dropbox/ascii/install-all-the-dependencies.txt')
_log.setLevel(logging.DEBUG)
logging.basicConfig()
converter = AsciiToImage()
converter.convert(txt.read(), '/tmp/test.png')
'''
im, x, y, duration = renderImage(h, 10)
print "Rendered image in %.5f seconds" % duration
im.save('tldr.png', "PNG")
'''

View File

@ -17,10 +17,12 @@ import asciitoimage
import chardet
import os
import Image
import logging
from mediagoblin import mg_globals as mgg
from mediagoblin.processing import create_pub_filepath, THUMB_SIZE
_log = logging.getLogger(__name__)
def process_ascii(entry):
'''
@ -42,6 +44,17 @@ def process_ascii(entry):
with queued_file:
queued_file_charset = chardet.detect(queued_file.read())
# Only select a non-utf-8 charset if chardet is *really* sure
# Tested with "Feli\x0109an superjaron", which was detecte
if queued_file_charset['confidence'] < 0.9:
interpreted_charset = 'utf-8'
else:
interpreted_charset = queued_file_charset['encoding']
_log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
queued_file_charset,
interpreted_charset))
queued_file.seek(0) # Rewind the queued file
thumb_filepath = create_pub_filepath(
@ -73,13 +86,16 @@ def process_ascii(entry):
queued_file.seek(0) # Rewind *again*
unicode_filepath = create_pub_filepath(entry, 'unicode.txt')
unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
with mgg.public_store.get_file(unicode_filepath, 'wb') \
as unicode_file:
# Decode the original file from its detected charset (or UTF8)
# Encode the unicode instance to ASCII and replace any non-ASCII
# with an HTML entity (&#
unicode_file.write(
unicode(queued_file.read().decode(
queued_file_charset['encoding'])).encode(
unicode(queued_file.read().decode(
interpreted_charset)).encode(
'ascii',
'xmlcharrefreplace'))