From 7525cdf9eb1dcf8e19b340072247426d0fd570c0 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Wed, 9 Jan 2013 12:36:26 +0100 Subject: [PATCH 01/10] Disallow ":" as part of a media slug We might want to use "id:IDN" as a special case slug to point to a media's id. Signed-off-by: Sebastian Spaeth --- mediagoblin/tools/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py index 8604ad5f..d9179f9e 100644 --- a/mediagoblin/tools/url.py +++ b/mediagoblin/tools/url.py @@ -25,7 +25,7 @@ except ImportError: USING_TRANSLITCODEC = False -_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') +_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') def slugify(text, delim=u'-'): From 4ca0755ab63192b9a79c1152673bfeb19e45e8a1 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Wed, 9 Jan 2013 12:38:08 +0100 Subject: [PATCH 02/10] Sanitize slug input on media edit Previously we allowed EVERYTHING, even slashes as slug when editing the media. Make sure we slugify the input to sanitize it. (+ string formdata is unicode, so there is no need to convert it) Signed-off-by: Sebastian Spaeth --- mediagoblin/edit/views.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mediagoblin/edit/views.py b/mediagoblin/edit/views.py index ece11df5..646a9e5b 100644 --- a/mediagoblin/edit/views.py +++ b/mediagoblin/edit/views.py @@ -32,6 +32,7 @@ from mediagoblin.tools.response import render_to_response, redirect from mediagoblin.tools.translate import pass_to_ugettext as _ from mediagoblin.tools.text import ( convert_to_tag_list_of_dicts, media_tags_as_string) +from mediagoblin.tools.url import slugify from mediagoblin.db.util import check_media_slug_used, check_collection_slug_used import mimetypes @@ -57,22 +58,20 @@ def edit_media(request, media): if request.method == 'POST' and form.validate(): # Make sure there isn't already a MediaEntry with such a slug # and userid. - slug_used = check_media_slug_used(media.uploader, request.form['slug'], - media.id) + slug = slugify(request.form['slug']) + slug_used = check_media_slug_used(media.uploader, slug, media.id) if slug_used: form.slug.errors.append( _(u'An entry with that slug already exists for this user.')) else: - media.title = unicode(request.form['title']) - media.description = unicode(request.form.get('description')) + media.title = request.form['title'] + media.description = request.form.get('description') media.tags = convert_to_tag_list_of_dicts( request.form.get('tags')) media.license = unicode(request.form.get('license', '')) or None - - media.slug = unicode(request.form['slug']) - + media.slug = slug media.save() return redirect(request, From 66d9f1b2a041eeb3dd592b807ad06c25acb746c7 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Wed, 9 Jan 2013 12:40:18 +0100 Subject: [PATCH 03/10] Make generate_slug assign a slug in any case generate_slug could assign "none" as slug. Make sure it assigns a unique slug in any case. We now try based on: a) existing slug values b) media.title c) media.id d) random garbage Signed-off-by: Sebastian Spaeth --- mediagoblin/db/mixin.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index 001b7826..daeda8ce 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -56,15 +56,22 @@ class MediaEntryMixin(object): # (db.models -> db.mixin -> db.util -> db.models) from mediagoblin.db.util import check_media_slug_used - self.slug = slugify(self.title) + #Is already a slug assigned? Check if it is valid + if self.slug: + self.slug = slugify(self.slug) + elif self.title: + #assign slug based on title + self.slug = slugify(self.title) + elif self.id: + # Does the object already have an ID? (after adding to the session) + self.slug = unicode(self.id) + else: + # Everything else failed, just use random garbage + self.slug = unicode(uuid4())[1:4] - duplicate = check_media_slug_used(self.uploader, self.slug, self.id) - - if duplicate: - if self.id is not None: - self.slug = u"%s-%s" % (self.id, self.slug) - else: - self.slug = None + while check_media_slug_used(self.uploader, self.slug, self.id): + # add garbage till it's unique + self.slug = self.slug + unicode(uuid4())[1:4] @property def description_html(self): From 72bb46c7c81983817f5060d67c65663f911c4504 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Tue, 15 Jan 2013 14:34:13 -0600 Subject: [PATCH 04/10] Need to import uuid4 for generate_slug to totally work --- mediagoblin/db/mixin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index daeda8ce..d3d4da66 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -27,6 +27,8 @@ These functions now live here and get "mixed in" into the real objects. """ +from uuid import uuid4 + from werkzeug.utils import cached_property from mediagoblin import mg_globals From 88de830fcfb3fb02af4b2d71b82efaa8c83df665 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Tue, 15 Jan 2013 15:48:19 -0600 Subject: [PATCH 05/10] A revised algorithm for generating slugs. This one does not *force* slugs, but usually it will probably result in a niceish one. The end *result* of the algorithm will (presumably, I have not tested it) result in these resolutions for these situations: - If we have a slug, make sure it's clean and sanitized, and if it's unique, we'll use that. - If we have a title, slugify it, and if it's unique, we'll use that. - If we can't get any sort of thing that looks like it'll be a useful slug out of a title or an existing slug, bail, and don't set the slug at all. Don't try to create something just because. Make sure we have a reasonable basis for a slug first. - If we have a reasonable basis for a slug (either based on existing slug or slugified title) but it's not unique, first try appending the entry's id, if that exists - If that doesn't result in something unique, tack on some randomly generated bits until it's unique. That'll be a little bit of junk, but at least it has the *basis* of a nice slug! --- mediagoblin/db/mixin.py | 58 +++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index d3d4da66..7cb530e4 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -54,6 +54,26 @@ class UserMixin(object): class MediaEntryMixin(object): def generate_slug(self): + """ + This one does not *force* slugs, but usually it will probably result + in a niceish one. + + The end *result* of the algorithm will (presumably, I have not tested + it) result in these resolutions for these situations: + - If we have a slug, make sure it's clean and sanitized, and if it's + unique, we'll use that. + - If we have a title, slugify it, and if it's unique, we'll use that. + - If we can't get any sort of thing that looks like it'll be a useful + slug out of a title or an existing slug, bail, and don't set the + slug at all. Don't try to create something just because. Make + sure we have a reasonable basis for a slug first. + - If we have a reasonable basis for a slug (either based on existing + slug or slugified title) but it's not unique, first try appending + the entry's id, if that exists + - If that doesn't result in something unique, tack on some randomly + generated bits until it's unique. That'll be a little bit of junk, + but at least it has the basis of a nice slug. + """ # import this here due to a cyclic import issue # (db.models -> db.mixin -> db.util -> db.models) from mediagoblin.db.util import check_media_slug_used @@ -61,19 +81,35 @@ class MediaEntryMixin(object): #Is already a slug assigned? Check if it is valid if self.slug: self.slug = slugify(self.slug) + + # otherwise, try to use the title. elif self.title: - #assign slug based on title + # assign slug based on title self.slug = slugify(self.title) - elif self.id: - # Does the object already have an ID? (after adding to the session) - self.slug = unicode(self.id) - else: - # Everything else failed, just use random garbage - self.slug = unicode(uuid4())[1:4] - - while check_media_slug_used(self.uploader, self.slug, self.id): - # add garbage till it's unique - self.slug = self.slug + unicode(uuid4())[1:4] + + # Do we have anything at this point? + # If not, we're not going to get a slug + # so just return... we're not going to force one. + if not self.slug: + return # giving up! + + # Otherwise, let's see if this is unique. + if check_media_slug_used(self.uploader, self.slug, self.id): + # It looks like it's being used... lame. + + # Can we just append the object's id to the end? + if self.id: + slug_with_id = "%s-%s" % (self.slug, self.id) + if not check_media_slug_used(self.uploader, + slug_with_id, self.id): + self.slug = slug_with_id + return # success! + + # okay, still no success; + # let's whack junk on there till it's unique. + self.slug = self.slug + u'-' + while check_media_slug_used(self.uploader, self.slug, self.id): + self.slug = self.slug + unicode(uuid4())[1:4] @property def description_html(self): From 985871095e873aca1e160a7eea4ad97f4b80de18 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Tue, 15 Jan 2013 16:11:15 -0600 Subject: [PATCH 06/10] Simplifying string concatenation in generate_slug and fixing docstring - made the mistake of copying some commit message things into the docstring. Fixed. - elrond points out that += is nicer and we don't need u"" in this case since we're not concatenating a variable, we're concatenating a known ascii string. --- mediagoblin/db/mixin.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index 7cb530e4..0e000324 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -55,11 +55,13 @@ class UserMixin(object): class MediaEntryMixin(object): def generate_slug(self): """ + Generate a unique slug for this MediaEntry. + This one does not *force* slugs, but usually it will probably result in a niceish one. - The end *result* of the algorithm will (presumably, I have not tested - it) result in these resolutions for these situations: + The end *result* of the algorithm will result in these resolutions for + these situations: - If we have a slug, make sure it's clean and sanitized, and if it's unique, we'll use that. - If we have a title, slugify it, and if it's unique, we'll use that. @@ -99,7 +101,7 @@ class MediaEntryMixin(object): # Can we just append the object's id to the end? if self.id: - slug_with_id = "%s-%s" % (self.slug, self.id) + slug_with_id = u"%s-%s" % (self.slug, self.id) if not check_media_slug_used(self.uploader, slug_with_id, self.id): self.slug = slug_with_id @@ -107,9 +109,9 @@ class MediaEntryMixin(object): # okay, still no success; # let's whack junk on there till it's unique. - self.slug = self.slug + u'-' + self.slug += '-' while check_media_slug_used(self.uploader, self.slug, self.id): - self.slug = self.slug + unicode(uuid4())[1:4] + self.slug += uuid4()[1:4] @property def description_html(self): From 266b42b3dc7697801f726875e1a734b9e4ba7dc9 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Wed, 23 Jan 2013 15:15:22 -0600 Subject: [PATCH 07/10] Switching uuid4()[1:4] -> uuid4().hex[:4] .hex is what we need to access to get at the ascii (hex) version anyway. Also, not sure why the previous version grabbed starting at the index of 1... just grab the first characters instead. --- mediagoblin/db/mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index 0e000324..c4c508a4 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -111,7 +111,7 @@ class MediaEntryMixin(object): # let's whack junk on there till it's unique. self.slug += '-' while check_media_slug_used(self.uploader, self.slug, self.id): - self.slug += uuid4()[1:4] + self.slug += uuid4().hex[:4] @property def description_html(self): From b0118957ff3304226f3500b4f5cfaecc41e9ee48 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Wed, 23 Jan 2013 16:40:39 -0600 Subject: [PATCH 08/10] We don't want any empty string slugs, so make "" -> None --- mediagoblin/db/mixin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index c4c508a4..98414d72 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -89,6 +89,10 @@ class MediaEntryMixin(object): # assign slug based on title self.slug = slugify(self.title) + # We don't want any empty string slugs + if self.slug == u"": + self.slug = None + # Do we have anything at this point? # If not, we're not going to get a slug # so just return... we're not going to force one. From 394a4a37f7f71a6182d47cf7e6fcdf4fcccddd5d Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Wed, 23 Jan 2013 16:47:30 -0600 Subject: [PATCH 09/10] require mock for the new uuid-mocking tests --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 12284f26..9c295dc4 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ setup( 'Markdown', 'sqlalchemy>=0.7.0', 'sqlalchemy-migrate', + 'mock', ## This is optional! # 'translitcodec', ## For now we're expecting that users will install this from From a81082fcaf41666841f40b823ddcef0255aebbe3 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Wed, 23 Jan 2013 16:49:54 -0600 Subject: [PATCH 10/10] New mediaentry slug tests now pass! - fixed some issues with "whacking uuid junk on the slug" - uuid4() -> uuid.uuid4() so that mock will work right - added all the tests! --- mediagoblin/db/mixin.py | 7 +- mediagoblin/tests/test_modelmethods.py | 130 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 3 deletions(-) create mode 100644 mediagoblin/tests/test_modelmethods.py diff --git a/mediagoblin/db/mixin.py b/mediagoblin/db/mixin.py index 98414d72..b69e7d51 100644 --- a/mediagoblin/db/mixin.py +++ b/mediagoblin/db/mixin.py @@ -27,7 +27,7 @@ These functions now live here and get "mixed in" into the real objects. """ -from uuid import uuid4 +import uuid from werkzeug.utils import cached_property @@ -113,9 +113,10 @@ class MediaEntryMixin(object): # okay, still no success; # let's whack junk on there till it's unique. - self.slug += '-' + self.slug += '-' + uuid.uuid4().hex[:4] + # keep going if necessary! while check_media_slug_used(self.uploader, self.slug, self.id): - self.slug += uuid4().hex[:4] + self.slug += uuid.uuid4().hex[:4] @property def description_html(self): diff --git a/mediagoblin/tests/test_modelmethods.py b/mediagoblin/tests/test_modelmethods.py new file mode 100644 index 00000000..4f2df19b --- /dev/null +++ b/mediagoblin/tests/test_modelmethods.py @@ -0,0 +1,130 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# Maybe not every model needs a test, but some models have special +# methods, and so it makes sense to test them here. + + +from mediagoblin.db.models import MediaEntry + +from mediagoblin.tests.tools import get_test_app, \ + fixture_add_user + +import mock + + +class FakeUUID(object): + hex = 'testtest-test-test-test-testtesttest' + +UUID_MOCK = mock.Mock(return_value=FakeUUID()) + + +class TestMediaEntrySlugs(object): + def setUp(self): + self.test_app = get_test_app(dump_old_app=True) + self.chris_user = fixture_add_user(u'chris') + self.emily_user = fixture_add_user(u'emily') + self.existing_entry = self._insert_media_entry_fixture( + title=u"Beware, I exist!", + slug=u"beware-i-exist") + + def _insert_media_entry_fixture(self, title=None, slug=None, this_id=None, + uploader=None, save=True): + entry = MediaEntry() + entry.title = title or u"Some title" + entry.slug = slug + entry.id = this_id + entry.uploader = uploader or self.chris_user.id + entry.media_type = u'image' + + if save: + entry.save() + + return entry + + def test_unique_slug_from_title(self): + entry = self._insert_media_entry_fixture(u"Totally unique slug!", save=False) + entry.generate_slug() + assert entry.slug == u'totally-unique-slug' + + def test_old_good_unique_slug(self): + entry = self._insert_media_entry_fixture( + u"A title here", u"a-different-slug-there", save=False) + entry.generate_slug() + assert entry.slug == u"a-different-slug-there" + + def test_old_weird_slug(self): + entry = self._insert_media_entry_fixture( + slug=u"wowee!!!!!", save=False) + entry.generate_slug() + assert entry.slug == u"wowee" + + def test_existing_slug_use_id(self): + entry = self._insert_media_entry_fixture( + u"Beware, I exist!!", this_id=9000, save=False) + entry.generate_slug() + assert entry.slug == u"beware-i-exist-9000" + + @mock.patch('uuid.uuid4', UUID_MOCK) + def test_existing_slug_cant_use_id(self): + # This one grabs the nine thousand slug + self._insert_media_entry_fixture( + slug=u"beware-i-exist-9000") + + entry = self._insert_media_entry_fixture( + u"Beware, I exist!!", this_id=9000, save=False) + entry.generate_slug() + assert entry.slug == u"beware-i-exist-test" + + @mock.patch('uuid.uuid4', UUID_MOCK) + def test_existing_slug_cant_use_id_extra_junk(self): + # This one grabs the nine thousand slug + self._insert_media_entry_fixture( + slug=u"beware-i-exist-9000") + + # This one grabs makes sure the annoyance doesn't stop + self._insert_media_entry_fixture( + slug=u"beware-i-exist-test") + + entry = self._insert_media_entry_fixture( + u"Beware, I exist!!", this_id=9000, save=False) + entry.generate_slug() + assert entry.slug == u"beware-i-exist-testtest" + + def test_garbage_slug(self): + """ + Titles that sound totally like Q*Bert shouldn't have slugs at + all. We'll just reference them by id. + + , + / \ (@!#?@!) + |\,/| ,-, / + | |#| ( ")~ + / \|/ \ L L + |\,/|\,/| + | |#, |#| + / \|/ \|/ \ + |\,/|\,/|\,/| + | |#| |#| |#| + / \|/ \|/ \|/ \ + |\,/|\,/|\,/|\,/| + | |#| |#| |#| |#| + \|/ \|/ \|/ \|/ + """ + qbert_entry = self._insert_media_entry_fixture( + u"@!#?@!", save=False) + qbert_entry.generate_slug() + assert qbert_entry.slug is None