ASCII art support - Fixes

- Improved(?) character set detection, chardet will not win over UTF-8 unless it is >= 90% sure. - Changed the unicode.txt to ascii-portable.txt, since there is no unicode in the file. - etc.
2012-02-02 21:28:21 +01:00
parent 3f1dc64ed1
commit 010d28b4f0
2 changed files with 25 additions and 23 deletions
--- a/mediagoblin/media_types/ascii/asciitoimage.py
+++ b/mediagoblin/media_types/ascii/asciitoimage.py
@@ -65,7 +65,8 @@ class AsciiToImage(object):

        self._if = ImageFont.truetype(
            self._font,
-            self._font_size)
+            self._font_size,
+            encoding='unic')

        #      ,-,-^-'-^'^-^'^-'^-.
        #     ( I am a wall socket )Oo,  ___
@@ -91,6 +92,9 @@ class AsciiToImage(object):
        - Character set detection and decoding,
          http://pypi.python.org/pypi/chardet
        '''
+        # Convert the input from str to unicode
+        text = text.decode('utf-8')
+
        # TODO: Account for alternative line endings
        lines = text.split('\n')

@@ -123,7 +127,7 @@ class AsciiToImage(object):

                px_pos = self._px_pos(char_pos)

-                _log.debug('Writing character "{0}" at {1} (px pos {2}'.format(
+                _log.debug('Writing character "{0}" at {1} (px pos {2})'.format(
                        char,
                        char_pos,
                        px_pos))
@@ -152,21 +156,3 @@ class AsciiToImage(object):
                px_pos[index] = char_pos[index] * self._if_dims[index]

        return px_pos
-
-
-if __name__ == "__main__":
-    import urllib
-    txt = urllib.urlopen('file:///home/joar/Dropbox/ascii/install-all-the-dependencies.txt')
-
-    _log.setLevel(logging.DEBUG)
-    logging.basicConfig()
-
-    converter = AsciiToImage()
-
-    converter.convert(txt.read(), '/tmp/test.png')
-
-    '''
-    im, x, y, duration = renderImage(h, 10)
-    print "Rendered image in %.5f seconds" % duration
-    im.save('tldr.png', "PNG")
-    '''
--- a/mediagoblin/media_types/ascii/processing.py
+++ b/mediagoblin/media_types/ascii/processing.py
@@ -17,10 +17,12 @@ import asciitoimage
 import chardet
 import os
 import Image
+import logging

 from mediagoblin import mg_globals as mgg
 from mediagoblin.processing import create_pub_filepath, THUMB_SIZE

+_log = logging.getLogger(__name__)

 def process_ascii(entry):
    '''
@@ -42,6 +44,17 @@ def process_ascii(entry):
    with queued_file:
        queued_file_charset = chardet.detect(queued_file.read())

+        # Only select a non-utf-8 charset if chardet is *really* sure
+        # Tested with "Feli\x0109an superjaron", which was detecte
+        if queued_file_charset['confidence'] < 0.9:
+            interpreted_charset = 'utf-8'
+        else:
+            interpreted_charset = queued_file_charset['encoding']
+
+        _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
+                queued_file_charset,
+                interpreted_charset))
+
        queued_file.seek(0)  # Rewind the queued file

        thumb_filepath = create_pub_filepath(
@@ -73,13 +86,16 @@ def process_ascii(entry):

        queued_file.seek(0)  # Rewind *again*

-        unicode_filepath = create_pub_filepath(entry, 'unicode.txt')
+        unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')

        with mgg.public_store.get_file(unicode_filepath, 'wb') \
                as unicode_file:
+            # Decode the original file from its detected charset (or UTF8)
+            # Encode the unicode instance to ASCII and replace any non-ASCII
+            # with an HTML entity (&#
            unicode_file.write(
-                    unicode(queued_file.read().decode(
-                        queued_file_charset['encoding'])).encode(
+                unicode(queued_file.read().decode(
+                        interpreted_charset)).encode(
                    'ascii',
                    'xmlcharrefreplace'))