-
Notifications
You must be signed in to change notification settings - Fork 136
Expand file tree
/
Copy pathrequests.py
More file actions
108 lines (95 loc) · 4.23 KB
/
requests.py
File metadata and controls
108 lines (95 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Remote document loader using Requests.
.. module:: jsonld.documentloader.requests
:synopsis: Remote document loader using Requests
.. moduleauthor:: Dave Longley
.. moduleauthor:: Mike Johnson
.. moduleauthor:: Tim McNamara <tim.mcnamara@okfn.org>
.. moduleauthor:: Olaf Conradi <olaf@conradi.org>
"""
import re
import string
import urllib.parse as urllib_parse
from pyld import iri_resolver
from pyld.jsonld import JsonLdError, parse_link_header, LINK_HEADER_REL
def requests_document_loader(secure=False, **kwargs):
"""
Create a Requests document loader.
Can be used to setup extra Requests args such as verify, cert, timeout,
or others.
:param secure: require all requests to use HTTPS (default: False).
:param **kwargs: extra keyword args for Requests get() call.
:return: the RemoteDocument loader function.
"""
import requests
def loader(url, options={}):
"""
Retrieves JSON-LD at the given URL.
:param url: the URL to retrieve.
:return: the RemoteDocument.
"""
try:
# validate URL
pieces = urllib_parse.urlparse(url)
if (not all([pieces.scheme, pieces.netloc]) or
pieces.scheme not in ['http', 'https'] or
set(pieces.netloc) > set(
string.ascii_letters + string.digits + '-.:')):
raise JsonLdError(
'URL could not be dereferenced; only "http" and "https" '
'URLs are supported.',
'jsonld.InvalidUrl', {'url': url},
code='loading document failed')
if secure and pieces.scheme != 'https':
raise JsonLdError(
'URL could not be dereferenced; secure mode enabled and '
'the URL\'s scheme is not "https".',
'jsonld.InvalidUrl', {'url': url},
code='loading document failed')
headers = options.get('headers')
if headers is None:
headers = {
'Accept': 'application/ld+json, application/json'
}
response = requests.get(url, headers=headers, **kwargs)
content_type = response.headers.get('content-type')
if not content_type:
content_type = 'application/octet-stream'
doc = {
'contentType': content_type,
'contextUrl': None,
'documentUrl': response.url,
}
link_header = response.headers.get('link')
if link_header:
linked_context = parse_link_header(link_header).get(
LINK_HEADER_REL)
# only 1 related link header permitted
if linked_context and content_type != 'application/ld+json':
if isinstance(linked_context, list):
raise JsonLdError(
'URL could not be dereferenced, '
'it has more than one '
'associated HTTP Link Header.',
'jsonld.LoadDocumentError',
{'url': url},
code='multiple context link headers')
doc['contextUrl'] = linked_context['target']
linked_alternate = parse_link_header(link_header).get('alternate')
# if not JSON-LD, alternate may point there
if (linked_alternate and
linked_alternate.get('type') == 'application/ld+json' and
not re.match(r'^application\/(\w*\+)?json$', content_type)):
doc['contentType'] = 'application/ld+json'
doc['documentUrl'] = iri_resolver.resolve(linked_alternate['target'], url)
return loader(doc['documentUrl'], options=options)
doc['document'] = response.json()
return doc
except JsonLdError as e:
raise e
except Exception as cause:
raise JsonLdError(
'Could not retrieve a JSON-LD document from the URL.',
'jsonld.LoadDocumentError',
code='loading document failed') from cause
return loader