A super strict HTML cleaner method with mediocre tests.
This commit is contained in:
parent
1e85d28e01
commit
a68ee5556e
@ -103,3 +103,22 @@ def test_locale_to_lower_lower():
|
|||||||
# crazy renditions. Useful?
|
# crazy renditions. Useful?
|
||||||
assert util.locale_to_lower_lower('en-US') == 'en-us'
|
assert util.locale_to_lower_lower('en-US') == 'en-us'
|
||||||
assert util.locale_to_lower_lower('en_us') == 'en-us'
|
assert util.locale_to_lower_lower('en_us') == 'en-us'
|
||||||
|
|
||||||
|
|
||||||
|
def test_html_cleaner():
|
||||||
|
# Remove images
|
||||||
|
result = util.clean_html(
|
||||||
|
'<p>Hi everybody! '
|
||||||
|
'<img src="http://example.org/huge-purple-barney.png" /></p>\n'
|
||||||
|
'<p>:)</p>')
|
||||||
|
assert result == (
|
||||||
|
'<div>'
|
||||||
|
'<p>Hi everybody! </p>\n'
|
||||||
|
'<p>:)</p>'
|
||||||
|
'</div>')
|
||||||
|
|
||||||
|
# Remove evil javascript
|
||||||
|
result = util.clean_html(
|
||||||
|
'<p><a href="javascript:nasty_surprise">innocent link!</a></p>')
|
||||||
|
assert result == (
|
||||||
|
'<p><a href="">innocent link!</a></p>')
|
||||||
|
@ -30,6 +30,7 @@ import jinja2
|
|||||||
import translitcodec
|
import translitcodec
|
||||||
from paste.deploy.loadwsgi import NicerConfigParser
|
from paste.deploy.loadwsgi import NicerConfigParser
|
||||||
from webob import Response, exc
|
from webob import Response, exc
|
||||||
|
from lxml.html.clean import Cleaner
|
||||||
|
|
||||||
from mediagoblin import mg_globals
|
from mediagoblin import mg_globals
|
||||||
from mediagoblin.db.util import ObjectId
|
from mediagoblin.db.util import ObjectId
|
||||||
@ -373,6 +374,32 @@ def read_config_file(conf_file):
|
|||||||
return mgoblin_conf
|
return mgoblin_conf
|
||||||
|
|
||||||
|
|
||||||
|
# A super strict version of the lxml.html cleaner class
|
||||||
|
HTML_CLEANER = Cleaner(
|
||||||
|
scripts=True,
|
||||||
|
javascript=True,
|
||||||
|
comments=True,
|
||||||
|
style=True,
|
||||||
|
links=True,
|
||||||
|
page_structure=True,
|
||||||
|
processing_instructions=True,
|
||||||
|
embedded=True,
|
||||||
|
frames=True,
|
||||||
|
forms=True,
|
||||||
|
annoying_tags=True,
|
||||||
|
allow_tags=[
|
||||||
|
'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
|
||||||
|
remove_unknown_tags=False, # can't be used with allow_tags
|
||||||
|
safe_attrs_only=True,
|
||||||
|
add_nofollow=True, # for now
|
||||||
|
host_whitelist=(),
|
||||||
|
whitelist_tags=set([]))
|
||||||
|
|
||||||
|
|
||||||
|
def clean_html(html):
|
||||||
|
return HTML_CLEANER.clean_html(html)
|
||||||
|
|
||||||
|
|
||||||
SETUP_GETTEXTS = {}
|
SETUP_GETTEXTS = {}
|
||||||
|
|
||||||
def setup_gettext(locale):
|
def setup_gettext(locale):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user