Source code for api.util.url

from urllib.parse import urlparse, ParseResult, urlencode


[docs]class URLUtility(object):
    """Contains different helper methods simplifying URL construction."""

[docs]    @staticmethod
    def build_url(base_url, query_parameters):
        """Construct a URL with specified query parameters.

        :param base_url: Base URL
        :type base_url: str

        :param query_parameters: Dictionary containing query parameters
        :type query_parameters: Dict

        :return: Constructed URL
        :rtype: str
        """
        result = urlparse(base_url)
        result = ParseResult(
            result.scheme,
            result.netloc,
            result.path,
            result.params,
            urlencode(query_parameters),
            result.fragment
        )

        return result.geturl()

[docs]    @staticmethod
    def url_match_in_domain_list(url, domain_list):
        """
        Attempts to match a candidate URL against a list of URL patterns, with wildcard
        matching of subdomains under a given root.

        To be matched against, a value in 'domain_list' must be of the form:

            `(http|https)://[(<subdomain>|*).]<domain>.<tld>`

        Examples of valid domain_list entries:

            ```
            http://librarysimplified.org
            https://librarysimplified.org
            https://www.librarysimplified.org
            https://*.librarysimplified.org
            https://alpha.bravo.charlie.librarysimplified.org
            https://*.charlie.librarysimplified.org
            capacitor://*.vercel.app
            ```

        Note that the entry `http://*.librarysimplified.org` WILL NOT match
        the URL of the root domain `http://librarysimplified.org`. To match the root
        domain you must also include it as a separate, non-wildcard entry.
        """
        try:
            url_parsed = urlparse(url)
        except AttributeError:
            return False    # origin value was not a string

        url_match_in_list = False

        for allowed_domain in domain_list:
            if url_match_in_list:
                break       # previous iteration matched

            try:
                allowed_parsed = urlparse(allowed_domain)
            except AttributeError:
                # TODO: log a warning about a bad value in the setting
                continue

            # If the scheme doesn't match it won't be allowed, period
            if url_parsed.scheme == allowed_parsed.scheme:
                # If we have a subdomain wildcard at the start of an allowed pattern,
                # check to see if the rest of the pattern is present in the candidate URL
                # Alternatively, check for a complete match of the netloc strings
                if (
                    (
                        allowed_parsed.netloc.startswith('*.')
                        and url_parsed.netloc.endswith(allowed_parsed.netloc[1:])
                    )
                    or url_parsed.netloc == allowed_parsed.netloc
                ):
                    url_match_in_list = True

        return url_match_in_list
Source code for api.util.url

Library Simplified Circulation Manager

Navigation

Related Topics