From 0f99d1742ef412781fd9737915e1cd1737186dec Mon Sep 17 00:00:00 2001 From: Sven Knebel Date: Wed, 12 Sep 2018 21:55:29 +0200 Subject: [PATCH 001/123] longitude/latitude was removed from hentry backcompat long ago http://microformats.org/wiki/index.php?title=h-entry&diff=next&oldid=60791 --- mf2py/backcompat-rules/hentry.json | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mf2py/backcompat-rules/hentry.json b/mf2py/backcompat-rules/hentry.json index aa7b8be..4eaa52d 100644 --- a/mf2py/backcompat-rules/hentry.json +++ b/mf2py/backcompat-rules/hentry.json @@ -11,10 +11,7 @@ ], "published": [ "dt-published" - ], - "latitude": [ - "p-latitude" - ], + ], "entry-content": [ "e-content" ], @@ -31,9 +28,6 @@ ], "updated": [ "dt-updated" - ], - "longitude": [ - "p-longitude" ] }, "rels": { From ed9dc761b6557ca63cc5ef417c177c297282efe3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" Date: Mon, 1 Oct 2018 16:05:46 +0000 Subject: [PATCH 002/123] Bump requests from 2.18.4 to 2.19.1 Bumps [requests](https://github.com/requests/requests) from 2.18.4 to 2.19.1. - [Release notes](https://github.com/requests/requests/releases) - [Changelog](https://github.com/requests/requests/blob/master/HISTORY.md) - [Commits](https://github.com/requests/requests/compare/v2.18.4...v2.19.1) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 85b8953..134db66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ html5lib==1.0.1 nose==1.3.0 mock==1.3.0 lxml==4.2.4 -requests==2.18.4 +requests==2.19.1 BeautifulSoup4==4.6.0 -e . From 96d108ff8891bcf16cd46f36ef01221a7b754462 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" Date: Mon, 1 Oct 2018 16:12:37 +0000 Subject: [PATCH 003/123] Bump beautifulsoup4 from 4.6.0 to 4.6.3 Bumps [beautifulsoup4](http://www.crummy.com/software/BeautifulSoup/bs4/) from 4.6.0 to 4.6.3. Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 134db66..45378b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,5 @@ nose==1.3.0 mock==1.3.0 lxml==4.2.4 requests==2.19.1 -BeautifulSoup4==4.6.0 +BeautifulSoup4==4.6.3 -e . From f4ad1ec45492e5256d7830f05025364b5691f3db Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" Date: Mon, 1 Oct 2018 16:21:40 +0000 Subject: [PATCH 004/123] Bump lxml from 4.2.4 to 4.2.5 Bumps [lxml](https://github.com/lxml/lxml) from 4.2.4 to 4.2.5. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-4.2.4...lxml-4.2.5) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 45378b3..d063c45 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ html5lib==1.0.1 nose==1.3.0 mock==1.3.0 -lxml==4.2.4 +lxml==4.2.5 requests==2.19.1 BeautifulSoup4==4.6.3 -e . From a51bd81b488fc55de09cbbba4a8d46877cd543be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" Date: Mon, 1 Oct 2018 16:32:12 +0000 Subject: [PATCH 005/123] Bump mock from 1.3.0 to 2.0.0 Bumps [mock](https://github.com/testing-cabal/mock) from 1.3.0 to 2.0.0. - [Release notes](https://github.com/testing-cabal/mock/releases) - [Changelog](https://github.com/testing-cabal/mock/blob/master/NEWS) - [Commits](https://github.com/testing-cabal/mock/compare/1.3.0...2.0.0) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d063c45..b9c89b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Keep in sync with setup.py's install_requires! html5lib==1.0.1 nose==1.3.0 -mock==1.3.0 +mock==2.0.0 lxml==4.2.5 requests==2.19.1 BeautifulSoup4==4.6.3 From 1121e30b0db24c2aac46208de38e7709ca74830c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" Date: Mon, 1 Oct 2018 16:36:53 +0000 Subject: [PATCH 006/123] Bump nose from 1.3.0 to 1.3.7 Bumps [nose](https://github.com/nose-devs/nose) from 1.3.0 to 1.3.7. - [Release notes](https://github.com/nose-devs/nose/releases) - [Changelog](https://github.com/nose-devs/nose/blob/master/CHANGELOG) - [Commits](https://github.com/nose-devs/nose/compare/release_1.3.0...release_1.3.7) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b9c89b4..d9b375d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # Keep in sync with setup.py's install_requires! html5lib==1.0.1 -nose==1.3.0 +nose==1.3.7 mock==2.0.0 lxml==4.2.5 requests==2.19.1 From 3a534cd3c3daf13325d8e5c0c756c4b33fa580c8 Mon Sep 17 00:00:00 2001 From: Sven Knebel Date: Tue, 2 Oct 2018 18:40:58 +0200 Subject: [PATCH 007/123] Fix #135: reduce cases of implied photo according to spec change --- mf2py/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mf2py/parser.py b/mf2py/parser.py index 1cf07b1..d28bbac 100644 --- a/mf2py/parser.py +++ b/mf2py/parser.py @@ -192,7 +192,7 @@ def handle_microformat(root_class_names, el, value_property=None, if "name" not in properties and parsed_types_aggregation.isdisjoint("peh"): properties["name"] = [implied_properties.name(el, base_url=self.__url__)] - if "photo" not in properties: + if "photo" not in properties and parsed_types_aggregation.isdisjoint("uh"): x = implied_properties.photo(el, self.dict_class, self.__img_with_alt__, base_url=self.__url__) if x is not None: properties["photo"] = [x] From 919cb25a4d01febe613c6e711f58a1336442da6d Mon Sep 17 00:00:00 2001 From: Sven Knebel Date: Wed, 17 Oct 2018 20:41:55 +0200 Subject: [PATCH 008/123] resolve relative URLs always, as per upcoming spec change. Also add a test case covering the various options for u- properties. --- mf2py/parse_property.py | 15 +++++------ test/examples/u_all_cases.html | 48 ++++++++++++++++++++++++++++++++++ test/test_parser.py | 12 ++++++++- 3 files changed, 65 insertions(+), 10 deletions(-) create mode 100644 test/examples/u_all_cases.html diff --git a/mf2py/parse_property.py b/mf2py/parse_property.py index 2d72259..21731b1 100644 --- a/mf2py/parse_property.py +++ b/mf2py/parse_property.py @@ -49,21 +49,18 @@ def url(el, dict_class, img_with_alt, base_url=''): if prop_value is None: prop_value = get_attr(el, "data", check_name="object") - if prop_value is not None: - return try_urljoin(base_url, prop_value) - - # handle value-class-pattern - prop_value = value_class_pattern.text(el) - if prop_value is not None: - return prop_value + if prop_value is None: + # handle value-class-pattern + prop_value = value_class_pattern.text(el) - prop_value = get_attr(el, "title", check_name="abbr") + if prop_value is None: + prop_value = get_attr(el, "title", check_name="abbr") if prop_value is None: prop_value = get_attr(el, "value", check_name=("data", "input")) if prop_value is None: prop_value = get_textContent(el) - return prop_value + return try_urljoin(base_url, prop_value) def datetime(el, default_date=None): diff --git a/test/examples/u_all_cases.html b/test/examples/u_all_cases.html new file mode 100644 index 0000000..dedd056 --- /dev/null +++ b/test/examples/u_all_cases.html @@ -0,0 +1,48 @@ + + + + Hello World + + + +
+

Testing variations of u-properties

+
    + +
  • +
  • +
  • +
  • +
  • +
  • + +
  • +
  • +
  • +
  • +
  • +
  • + +
  • +
  • + +
  • +
  • + + + +
  • CHEESE
  • +
  • CHEESE
  • + +
  • +
  • +
  • +
  • + +
  • /test

  • +
  • http://example.com/test

  • +
  • /test

  • +
  • /test

  • +
+
+ diff --git a/test/test_parser.py b/test/test_parser.py index 505a4be..3283b3f 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -905,4 +905,14 @@ def test_input_tree_integrity(): def make_labelled_cmp(label): f = lambda html1, html2: assert_equal(html1,html2) f.description = label - return f \ No newline at end of file + return f + +def test_all_u_cases(): + """ test variations of u- parsing and that relative urls are always resolved """ + + URL_COUNT = 28 + result = parse_fixture("u_all_cases.html") + + assert_equal(URL_COUNT, len(result['items'][0]['properties']['url'])) + for i in range(URL_COUNT): + yield make_labelled_cmp("all_u_cases_" + str(i)), "http://example.com/test", result['items'][0]['properties']['url'][i] From 7dd6cd5d9d25ebde6f73e7eac623788385cbc462 Mon Sep 17 00:00:00 2001 From: Sven Knebel Date: Sun, 18 Nov 2018 18:07:34 +0100 Subject: [PATCH 009/123] VCP now handles tz offsets without leading zeros --- mf2py/datetime_helpers.py | 2 +- test/examples/datetimes.html | 33 +++++++++++++++++++++++++++++++++ test/test_parser.py | 10 +++++++++- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/mf2py/datetime_helpers.py b/mf2py/datetime_helpers.py index 02b5321..95e1093 100644 --- a/mf2py/datetime_helpers.py +++ b/mf2py/datetime_helpers.py @@ -10,7 +10,7 @@ SEC_RE = r'(:(?P\d{2})(\.\d+)?)' RAWTIME_RE = r'(?P\d{1,2})(:(?P\d{2})%s?)?' % (SEC_RE) AMPM_RE = r'am|pm|a\.m\.|p\.m\.|AM|PM|A\.M\.|P\.M\.' -TIMEZONE_RE = r'Z|[+-]\d{2}:?\d{2}?' +TIMEZONE_RE = r'Z|[+-]\d{1,2}:?\d{2}?' TIME_RE = (r'(?P%s)( ?(?P%s))?( ?(?P%s))?' % (RAWTIME_RE, AMPM_RE, TIMEZONE_RE)) DATETIME_RE = (r'(?P%s)(?P[T ])(?P