added new slugify with unidecode
This commit is contained in:
parent
1e80c976e2
commit
2636dddfa6
@ -77,6 +77,12 @@ def test_slugify():
|
|||||||
assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
|
assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
|
||||||
assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
|
assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
|
||||||
assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
|
assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
|
||||||
|
# Russian
|
||||||
|
assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
|
||||||
|
u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
|
||||||
|
# Korean
|
||||||
|
assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
|
||||||
|
u'gongweoneseo-sancaeg')
|
||||||
|
|
||||||
def test_locale_to_lower_upper():
|
def test_locale_to_lower_upper():
|
||||||
"""
|
"""
|
||||||
|
@ -15,15 +15,7 @@
|
|||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
# This import *is* used; see word.encode('tranlit/long') below.
|
from unidecode import unidecode
|
||||||
from unicodedata import normalize
|
|
||||||
|
|
||||||
try:
|
|
||||||
import translitcodec
|
|
||||||
USING_TRANSLITCODEC = True
|
|
||||||
except ImportError:
|
|
||||||
USING_TRANSLITCODEC = False
|
|
||||||
|
|
||||||
|
|
||||||
_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
|
_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
|
||||||
|
|
||||||
@ -34,11 +26,5 @@ def slugify(text, delim=u'-'):
|
|||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
for word in _punct_re.split(text.lower()):
|
for word in _punct_re.split(text.lower()):
|
||||||
if USING_TRANSLITCODEC:
|
result.extend(unidecode(word).split())
|
||||||
word = word.encode('translit/long')
|
|
||||||
else:
|
|
||||||
word = normalize('NFKD', word).encode('ascii', 'ignore')
|
|
||||||
|
|
||||||
if word:
|
|
||||||
result.append(word)
|
|
||||||
return unicode(delim.join(result))
|
return unicode(delim.join(result))
|
||||||
|
1
setup.py
1
setup.py
@ -65,6 +65,7 @@ try:
|
|||||||
'pytz',
|
'pytz',
|
||||||
'six',
|
'six',
|
||||||
'oauthlib==0.5.0',
|
'oauthlib==0.5.0',
|
||||||
|
'unidecode',
|
||||||
|
|
||||||
## Annoying. Please remove once we can! We only indirectly
|
## Annoying. Please remove once we can! We only indirectly
|
||||||
## use pbr, and currently it breaks things, presumably till
|
## use pbr, and currently it breaks things, presumably till
|
||||||
|
Loading…
x
Reference in New Issue
Block a user