added new slugify with unidecode

This commit is contained in:
Boris Bobrov 2014-03-11 15:50:01 +05:00 committed by Christopher Allan Webber
parent 1e80c976e2
commit 2636dddfa6
3 changed files with 9 additions and 16 deletions

View File

@ -77,6 +77,12 @@ def test_slugify():
assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park' assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc' assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef' assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
# Russian
assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
# Korean
assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
u'gongweoneseo-sancaeg')
def test_locale_to_lower_upper(): def test_locale_to_lower_upper():
""" """

View File

@ -15,15 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import re import re
# This import *is* used; see word.encode('tranlit/long') below. from unidecode import unidecode
from unicodedata import normalize
try:
import translitcodec
USING_TRANSLITCODEC = True
except ImportError:
USING_TRANSLITCODEC = False
_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') _punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
@ -34,11 +26,5 @@ def slugify(text, delim=u'-'):
""" """
result = [] result = []
for word in _punct_re.split(text.lower()): for word in _punct_re.split(text.lower()):
if USING_TRANSLITCODEC: result.extend(unidecode(word).split())
word = word.encode('translit/long')
else:
word = normalize('NFKD', word).encode('ascii', 'ignore')
if word:
result.append(word)
return unicode(delim.join(result)) return unicode(delim.join(result))

View File

@ -65,6 +65,7 @@ try:
'pytz', 'pytz',
'six', 'six',
'oauthlib==0.5.0', 'oauthlib==0.5.0',
'unidecode',
## Annoying. Please remove once we can! We only indirectly ## Annoying. Please remove once we can! We only indirectly
## use pbr, and currently it breaks things, presumably till ## use pbr, and currently it breaks things, presumably till