Skip to content

Commit b68f188

Browse files
committed
Fixed #4310 -- Fixed a regular expression bug in strip_entities function and added tests for several django.utils.html functions. Based on patch from Brian Harring.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@5701 bcc190cf-cafb-0310-a4f2-bffc1f526a37
1 parent eff675b commit b68f188

File tree

4 files changed

+117
-1
lines changed

4 files changed

+117
-1
lines changed

django/utils/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def strip_spaces_between_tags(value):
5050

5151
def strip_entities(value):
5252
"Returns the given HTML with all entities (&something;) stripped"
53-
return re.sub(r'&(?:\w+|#\d);', '', force_unicode(value))
53+
return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
5454
strip_entities = allow_lazy(strip_entities, unicode)
5555

5656
def fix_ampersands(value):

tests/regressiontests/utils/__init__.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Test runner needs a models.py file.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
Tests for django.utils.
3+
"""
4+
5+
from unittest import TestCase
6+
7+
from django.utils import html
8+
9+
class TestUtilsHtml(TestCase):
10+
11+
def check_output(self, function, value, output=None):
12+
"""
13+
Check that function(value) equals output. If output is None,
14+
check that function(value) equals value.
15+
"""
16+
if output is None:
17+
output = value
18+
self.assertEqual(function(value), output)
19+
20+
def test_escape(self):
21+
f = html.escape
22+
items = (
23+
('&','&'),
24+
('<', '&lt;'),
25+
('>', '&gt;'),
26+
('"', '&quot;'),
27+
("'", '&#39;'),
28+
)
29+
# Substitution patterns for testing the above items.
30+
patterns = ("%s", "asdf%sfdsa", "%s1", "1%sb")
31+
for value, output in items:
32+
for pattern in patterns:
33+
self.check_output(f, pattern % value, pattern % output)
34+
# Check repeated values.
35+
self.check_output(f, value * 2, output * 2)
36+
# Verify it doesn't double replace &.
37+
self.check_output(f, '<&', '&lt;&amp;')
38+
39+
def test_linebreaks(self):
40+
f = html.linebreaks
41+
items = (
42+
("para1\n\npara2\r\rpara3", "<p>para1</p>\n\n<p>para2</p>\n\n<p>para3</p>"),
43+
("para1\nsub1\rsub2\n\npara2", "<p>para1<br />sub1<br />sub2</p>\n\n<p>para2</p>"),
44+
("para1\r\n\r\npara2\rsub1\r\rpara4", "<p>para1</p>\n\n<p>para2<br />sub1</p>\n\n<p>para4</p>"),
45+
("para1\tmore\n\npara2", "<p>para1\tmore</p>\n\n<p>para2</p>"),
46+
)
47+
for value, output in items:
48+
self.check_output(f, value, output)
49+
50+
def test_strip_tags(self):
51+
f = html.strip_tags
52+
items = (
53+
('<adf>a', 'a'),
54+
('</adf>a', 'a'),
55+
('<asdf><asdf>e', 'e'),
56+
('<f', '<f'),
57+
('</fe', '</fe'),
58+
('<x>b<y>', 'b'),
59+
)
60+
for value, output in items:
61+
self.check_output(f, value, output)
62+
63+
def test_strip_spaces_between_tags(self):
64+
f = html.strip_spaces_between_tags
65+
# Strings that should come out untouched.
66+
items = (' <adf>', '<adf> ', ' </adf> ', ' <f> x</f>')
67+
for value in items:
68+
self.check_output(f, value)
69+
# Strings that have spaces to strip.
70+
items = (
71+
('<d> </d>', '<d></d>'),
72+
('<p>hello </p>\n<p> world</p>', '<p>hello </p><p> world</p>'),
73+
('\n<p>\t</p>\n<p> </p>\n', '\n<p></p><p></p>\n'),
74+
)
75+
for value, output in items:
76+
self.check_output(f, value, output)
77+
78+
def test_strip_entities(self):
79+
f = html.strip_entities
80+
# Strings that should come out untouched.
81+
values = ("&", "&a", "&a", "a&#a")
82+
for value in values:
83+
self.check_output(f, value)
84+
# Valid entities that should be stripped from the patterns.
85+
entities = ("&#1;", "&#12;", "&a;", "&fdasdfasdfasdf;")
86+
patterns = (
87+
("asdf %(entity)s ", "asdf "),
88+
("%(entity)s%(entity)s", ""),
89+
("&%(entity)s%(entity)s", "&"),
90+
("%(entity)s3", "3"),
91+
)
92+
for entity in entities:
93+
for in_pattern, output in patterns:
94+
self.check_output(f, in_pattern % {'entity': entity}, output)
95+
96+
def test_fix_ampersands(self):
97+
f = html.fix_ampersands
98+
# Strings without ampersands or with ampersands already encoded.
99+
values = ("a&#1;", "b", "&a;", "&amp; &x; ", "asdf")
100+
patterns = (
101+
("%s", "%s"),
102+
("&%s", "&amp;%s"),
103+
("&%s&", "&amp;%s&amp;"),
104+
)
105+
for value in values:
106+
for in_pattern, out_pattern in patterns:
107+
self.check_output(f, in_pattern % value, out_pattern % value)
108+
# Strings with ampersands that need encoding.
109+
items = (
110+
("&#;", "&amp;#;"),
111+
("&#875 ;", "&amp;#875 ;"),
112+
("&#4abc;", "&amp;#4abc;"),
113+
)
114+
for value, output in items:
115+
self.check_output(f, value, output)

0 commit comments

Comments
 (0)