From 434f066c8e6686dc3da3ed187fb82f59e7995b39 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sat, 4 May 2013 22:34:53 +0100 Subject: [PATCH 1/3] Fix #37: Preserve order of attributes on serialization. This doesn't do anything about the fact that none of our treebuilders preserve attribute order: it merely avoids the serializer reordering them from the order it receives them in. This changes the serializer tests to use an OrderedDict to get alphabetical order so they continue to meet their expectations. --- html5lib/serializer/htmlserializer.py | 2 +- html5lib/tests/test_serializer.py | 14 +++++++++++--- requirements-test.txt | 1 + tox.ini | 12 ++++++++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py index 18344aed..1f7cace1 100644 --- a/html5lib/serializer/htmlserializer.py +++ b/html5lib/serializer/htmlserializer.py @@ -226,7 +226,7 @@ def serialize(self, treewalker, encoding=None): in_cdata = True elif in_cdata: self.serializeError(_("Unexpected child element of a CDATA element")) - for (attr_namespace, attr_name), attr_value in sorted(token["data"].items()): + for (attr_namespace, attr_name), attr_value in token["data"].items(): # TODO: Add namespace support here k = attr_name v = attr_value diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index adebf609..11b44540 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -3,6 +3,12 @@ import unittest from .support import get_data_files +try: + from collections import OrderedDict +except ImportError: + # Python 2.6 support + from ordereddict import OrderedDict + try: import json except ImportError: @@ -73,9 +79,11 @@ def _convertAttrib(self, attribs): """html5lib tree-walkers use a dict of (namespace, name): value for attributes, but JSON cannot represent this. Convert from the format in the serializer tests (a list of dicts with "namespace", "name", - and "value" as keys) to html5lib's tree-walker format.""" - attrs = {} - for attrib in attribs: + and "value" as keys) to html5lib's tree-walker format. Tests expect + attributes to be ordered alphabetically, so use an OrderedDict to + ensure this.""" + attrs = OrderedDict() + for attrib in sorted(attribs, key=lambda x: (x["namespace"], x["name"])): name = (attrib["namespace"], attrib["name"]) assert(name not in attrs) attrs[name] = attrib["value"] diff --git a/requirements-test.txt b/requirements-test.txt index f3c7e8e6..06d9dbca 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1 +1,2 @@ nose +ordereddict # Python 2.6 diff --git a/tox.ini b/tox.ini index 724efa6c..6a81b7db 100644 --- a/tox.ini +++ b/tox.ini @@ -11,3 +11,15 @@ deps = six commands = {envbindir}/nosetests + +[testenv:py26] +basepython = python2.6 +deps = + charade + datrie + Genshi + lxml + ordereddict + nose + six + From 692fa544e0523566e16c69ae7fc8604a59a5efcb Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sat, 4 May 2013 23:30:48 +0100 Subject: [PATCH 2/3] fixup! Fix #37: Preserve order of attributes on serialization. --- tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tox.ini b/tox.ini index 6a81b7db..bdbc0adf 100644 --- a/tox.ini +++ b/tox.ini @@ -22,4 +22,3 @@ deps = ordereddict nose six - From b1fafb86546054f22ef57acba65b3fe5ea8c82ca Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 5 May 2013 13:24:47 +0100 Subject: [PATCH 3/3] Move reordering attributes to a filter. --- .travis.yml | 1 + README.rst | 8 +++++++- html5lib/filters/alphabeticalattributes.py | 20 ++++++++++++++++++++ html5lib/tests/test_serializer.py | 18 ++++++------------ requirements-optional-2.6.txt | 3 +++ 5 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 html5lib/filters/alphabeticalattributes.py create mode 100644 requirements-optional-2.6.txt diff --git a/.travis.yml b/.travis.yml index 97d8cedf..65de456e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,7 @@ before_install: install: - pip install -r requirements.txt -r requirements-test.txt --use-mirrors - if [[ $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional.txt --use-mirrors; fi + - if [[ $TRAVIS_PYTHON_VERSION == "2.6" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.6.txt --use-mirrors; fi - if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi - if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi diff --git a/README.rst b/README.rst index f38b9426..f75a9fd5 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,10 @@ Optionally: be determined; ``chardet``, from which it was forked, can also be used on Python 2. +- ``ordereddict`` can be used under Python 2.6 + (``collections.OrderedDict`` is used instead on later versions) to + serialize attributes in alphabetical order. + Installation ------------ @@ -78,7 +82,9 @@ release tarballs this is unneeded):: $ git submodule update And then they can be run, with ``nose`` installed, using the -``nosetests`` command in the root directory. All should pass. +``nosetests`` command in the root directory. Note that ``ordereddict`` +is required for the serializer tests under Python 2.6. All should +pass. If you have all compatible Python implementations available on your system, you can run tests on all of them by using tox:: diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 00000000..fed6996c --- /dev/null +++ b/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +class Filter(_base.Filter): + def __iter__(self): + for token in _base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=lambda x: x[0]): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index 11b44540..93dd0e11 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -3,12 +3,6 @@ import unittest from .support import get_data_files -try: - from collections import OrderedDict -except ImportError: - # Python 2.6 support - from ordereddict import OrderedDict - try: import json except ImportError: @@ -21,6 +15,7 @@ import html5lib from html5lib import serializer, constants +from html5lib.filters.alphabeticalattributes import Filter as AlphabeticalAttributesFilter from html5lib.treewalkers._base import TreeWalker optionals_loaded = [] @@ -79,11 +74,9 @@ def _convertAttrib(self, attribs): """html5lib tree-walkers use a dict of (namespace, name): value for attributes, but JSON cannot represent this. Convert from the format in the serializer tests (a list of dicts with "namespace", "name", - and "value" as keys) to html5lib's tree-walker format. Tests expect - attributes to be ordered alphabetically, so use an OrderedDict to - ensure this.""" - attrs = OrderedDict() - for attrib in sorted(attribs, key=lambda x: (x["namespace"], x["name"])): + and "value" as keys) to html5lib's tree-walker format.""" + attrs = {} + for attrib in attribs: name = (attrib["namespace"], attrib["name"]) assert(name not in attrs) attrs[name] = attrib["value"] @@ -92,7 +85,8 @@ def _convertAttrib(self, attribs): def serialize_html(input, options): options = dict([(str(k), v) for k, v in options.items()]) - return serializer.HTMLSerializer(**options).render(JsonWalker(input), options.get("encoding", None)) + stream = AlphabeticalAttributesFilter(JsonWalker(input)) + return serializer.HTMLSerializer(**options).render(stream, options.get("encoding", None)) def runSerializerTest(input, expected, options): diff --git a/requirements-optional-2.6.txt b/requirements-optional-2.6.txt new file mode 100644 index 00000000..d1e6a805 --- /dev/null +++ b/requirements-optional-2.6.txt @@ -0,0 +1,3 @@ +# Can be used to force attributes to be serialized in alphabetical +# order. +ordereddict