Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
"""
JSON-LD extractor
"""

import jstyleson
import json
Comment thread
Gallaecio marked this conversation as resolved.
import re

import lxml.etree
Comment thread
Gallaecio marked this conversation as resolved.
Outdated

from extruct.utils import parse_html

HTML_OR_JS_COMMENTLINE = re.compile(r'^\s*(//.*|<!--.*-->)')


class JsonLdExtractor(object):
Expand All @@ -34,8 +33,7 @@ def _extract_items(self, node):
data = json.loads(script, strict=False)
except ValueError:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = json.loads(
HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
data = jstyleson.loads(script, strict=False)
if isinstance(data, list):
return data
elif isinstance(data, dict):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ mf2py>=1.1.0
six>=1.11
w3lib
html-text
jstyleson