Source code for api.util.url
from urllib.parse import urlparse, ParseResult, urlencode
[docs]class URLUtility(object):
"""Contains different helper methods simplifying URL construction."""
[docs] @staticmethod
def build_url(base_url, query_parameters):
"""Construct a URL with specified query parameters.
:param base_url: Base URL
:type base_url: str
:param query_parameters: Dictionary containing query parameters
:type query_parameters: Dict
:return: Constructed URL
:rtype: str
"""
result = urlparse(base_url)
result = ParseResult(
result.scheme,
result.netloc,
result.path,
result.params,
urlencode(query_parameters),
result.fragment
)
return result.geturl()
[docs] @staticmethod
def url_match_in_domain_list(url, domain_list):
"""
Attempts to match a candidate URL against a list of URL patterns, with wildcard
matching of subdomains under a given root.
To be matched against, a value in 'domain_list' must be of the form:
`(http|https)://[(<subdomain>|*).]<domain>.<tld>`
Examples of valid domain_list entries:
```
http://librarysimplified.org
https://librarysimplified.org
https://www.librarysimplified.org
https://*.librarysimplified.org
https://alpha.bravo.charlie.librarysimplified.org
https://*.charlie.librarysimplified.org
capacitor://*.vercel.app
```
Note that the entry `http://*.librarysimplified.org` WILL NOT match
the URL of the root domain `http://librarysimplified.org`. To match the root
domain you must also include it as a separate, non-wildcard entry.
"""
try:
url_parsed = urlparse(url)
except AttributeError:
return False # origin value was not a string
url_match_in_list = False
for allowed_domain in domain_list:
if url_match_in_list:
break # previous iteration matched
try:
allowed_parsed = urlparse(allowed_domain)
except AttributeError:
# TODO: log a warning about a bad value in the setting
continue
# If the scheme doesn't match it won't be allowed, period
if url_parsed.scheme == allowed_parsed.scheme:
# If we have a subdomain wildcard at the start of an allowed pattern,
# check to see if the rest of the pattern is present in the candidate URL
# Alternatively, check for a complete match of the netloc strings
if (
(
allowed_parsed.netloc.startswith('*.')
and url_parsed.netloc.endswith(allowed_parsed.netloc[1:])
)
or url_parsed.netloc == allowed_parsed.netloc
):
url_match_in_list = True
return url_match_in_list