added new slugify with unidecode

2014-03-11 15:50:01 +05:00 · 2014-03-11 15:50:01 +05:00 · 2636dddfa6
commit 2636dddfa6
parent 1e80c976e2
3 changed files with 9 additions and 16 deletions
--- a/mediagoblin/tests/test_util.py
+++ b/mediagoblin/tests/test_util.py
@ -77,6 +77,12 @@ def test_slugify():
    assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
    assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
    assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
    # Russian
    assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
            u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
    # Korean
    assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
            u'gongweoneseo-sancaeg')
 def test_locale_to_lower_upper():
    """
--- a/mediagoblin/tools/url.py
+++ b/mediagoblin/tools/url.py
@ -15,15 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import re
-# This import *is* used; see word.encode('tranlit/long') below.
+from unidecode import unidecode
 from unicodedata import normalize
 try:
    import translitcodec
    USING_TRANSLITCODEC = True
 except ImportError:
    USING_TRANSLITCODEC = False
 _punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
@ -34,11 +26,5 @@ def slugify(text, delim=u'-'):
    """
    result = []
    for word in _punct_re.split(text.lower()):
-        if USING_TRANSLITCODEC:
+        result.extend(unidecode(word).split())
            word = word.encode('translit/long')
        else:
            word = normalize('NFKD', word).encode('ascii', 'ignore')
        if word:
            result.append(word)
    return unicode(delim.join(result))
--- a/setup.py
+++ b/setup.py
@ -65,6 +65,7 @@ try:
        'pytz',
        'six',
        'oauthlib==0.5.0',
        'unidecode',
        ## Annoying.  Please remove once we can!  We only indirectly
        ## use pbr, and currently it breaks things, presumably till