import logging
from contextlib import contextmanager
import requests
from flask_babel import lazy_gettext as _
from requests import HTTPError, Request
from core.exceptions import BaseError
from core.model import DeliveryMechanism
from core.model.configuration import (
ConfigurationAttributeType,
ConfigurationFactory,
ConfigurationGrouping,
ConfigurationMetadata,
ConfigurationStorage,
)
from core.util import is_session
[docs]class ProQuestAPIClientConfiguration(ConfigurationGrouping):
"""Contains configuration settings of ProQuest API client."""
DEFAULT_PAGE_SIZE = 5000
books_catalog_service_url = ConfigurationMetadata(
key="books_catalog_service_url",
label=_("BooksCatalog service's URL"),
description=_("URL of the BooksCatalog service endpoint"),
type=ConfigurationAttributeType.TEXT,
required=True,
)
page_size = ConfigurationMetadata(
key="page_size",
label=_("Feed page's size"),
description=_(
"This value determines how many publications "
"will be on a single page fetched from the BooksCatalog service."
),
type=ConfigurationAttributeType.NUMBER,
required=False,
default=DEFAULT_PAGE_SIZE,
)
partner_auth_token_service_url = ConfigurationMetadata(
key="partner_auth_token_service_url",
label=_("PartnerAuthToken service's URL"),
description=_("URL of the PartnerAuthToken service endpoint."),
type=ConfigurationAttributeType.TEXT,
required=True,
)
download_link_service_url = ConfigurationMetadata(
key="download_link_service_url",
label=_("DownloadLink service's URL"),
description=_("URL of the DownloadLink service endpoint."),
type=ConfigurationAttributeType.TEXT,
required=True,
)
http_proxy_url = ConfigurationMetadata(
key="http_proxy_url",
label=_("HTTP proxy's URL"),
description=_("URL of the proxy handling HTTP traffic."),
type=ConfigurationAttributeType.TEXT,
required=False,
)
https_proxy_url = ConfigurationMetadata(
key="https_proxy_url",
label=_("HTTPS proxy's URL"),
description=_("URL of the proxy handling HTTPS traffic."),
type=ConfigurationAttributeType.TEXT,
required=False,
)
[docs]class ProQuestAPIInvalidJSONResponseError(BaseError):
"""Raised when the client receives from ProQuest API a response with incorrect JSON document."""
def __init__(self, response):
"""Initialize a new instance of ProQuestAPIIncorrectResponseError class.
:param response: Response object
:type response: requests.models.Response
"""
super(ProQuestAPIInvalidJSONResponseError, self).__init__(
"Response body does not contain a valid JSON document"
)
self._response = response
@property
def response(self):
"""Return the response associated with this error.
:return: Response associated with this error
:rtype: requests.models.Response
"""
return self._response
[docs]class ProQuestAPIMissingJSONPropertyError(ProQuestAPIInvalidJSONResponseError):
"""Raised when the client receives from ProQuest API a response with incorrect JSON document."""
def __init__(self, response, missing_property):
"""Initialize a new instance of ProQuestAPIMissingJSONPropertyError class.
:param response: Response object
:type response: requests.models.Response
:param missing_property: Name of the missing property
:type missing_property: str
"""
super(ProQuestAPIInvalidJSONResponseError, self).__init__(
"JSON document does not contain required property '{0}'".format(
missing_property
),
response,
)
self._missing_property = missing_property
@property
def missing_property(self):
"""Return the name of the missing property.
:return: Name of the missing property
:rtype: str
"""
return self._missing_property
[docs]class ProQuestBook(object):
"""POCO class containing information about a ProQuest book."""
def __init__(self, link=None, content=None, content_type=None):
"""Initialize a new instance of ProQuestBook class.
:param link: Book's link
:type link: Optional[str]
:param content: Book's content
:type content: Optional[bytes]
:param content_type: Content type
:type content_type: Optional[str]
"""
if link is not None and not isinstance(link, str):
raise ValueError("Argument 'link' must be a string")
if content is not None and not isinstance(content, bytes):
raise ValueError("Argument 'content' must be a bytes string")
if content_type is not None and not isinstance(content_type, str):
raise ValueError("Argument 'content_type' must be a string")
if link is not None and content is not None:
raise ValueError(
"'link' and 'content' cannot be both set up at the same time"
)
self._link = link
self._content = content
self._content_type = content_type
def __eq__(self, other):
"""Compare self and other other book.
:param other: Other book instance
:type other: Any
:return: Boolean value indicating whether self and other are equal to each to other
:rtype: bool
"""
if not isinstance(other, ProQuestBook):
return False
return (
self.link == other.link
and self.content == other.content
and self.content_type == other.content_type
)
@property
def link(self):
"""Return the book's link.
:return: Book's link
:rtype: Optional[str]
"""
return self._link
@property
def content(self):
"""Return the book's content.
:return: Book's content
:rtype: Optional[Union[str, bytes]]
"""
return self._content
@property
def content_type(self):
"""Return the content type.
:return: Content type
:rtype: Optional[str]
"""
return self._content_type
[docs]class ProQuestAPIClient(object):
"""ProQuest API client."""
MAX_PAGE_INDEX = 32766
MAX_PAGE_SIZE = 32766
RESPONSE_STATUS_CODE_FIELD = "statusCode"
RESPONSE_OPDS_FEED_FIELD = "opdsFeed"
TOKEN_FIELD = "token"
DOWNLOAD_LINK_FIELD = "downloadLink"
DRM_FREE_DOWNLOAD_LINK_KEYWORD = "getDrmFreeFile"
SUCCESS_STATUS_CODE = 200
def __init__(self, configuration_storage, configuration_factory):
"""Initialize a new instance of ProQuestAPIClient class.
:param configuration_storage: ConfigurationStorage object
:type configuration_storage: core.model.configuration.ConfigurationStorage
:param configuration_factory: Factory creating ProQuestAPIClientConfiguration instance
:type configuration_factory: core.model.configuration.ConfigurationFactory
"""
self._configuration_storage = configuration_storage
self._configuration_factory = configuration_factory
self._logger = logging.getLogger(__name__)
@contextmanager
def _get_configuration(self, db):
"""Return the configuration object.
:param db: Database session
:type db: sqlalchemy.orm.session.Session
:return: Configuration object
:rtype: ProQuestAPIClientConfiguration
"""
with self._configuration_factory.create(
self._configuration_storage, db, ProQuestAPIClientConfiguration
) as configuration:
yield configuration
def _get_request_headers(self, token):
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = "Bearer {0}".format(token)
self._logger.debug("Headers: {0}".format(headers))
return headers
def _get_request_proxies(self, configuration):
proxies = {}
if configuration.http_proxy_url:
proxies["http"] = configuration.http_proxy_url
if configuration.https_proxy_url:
proxies["https"] = configuration.https_proxy_url
self._logger.debug("Proxies: {0}".format(proxies))
return proxies
def _create_request(self, method, url, query_parameters, token=None):
"""Create a new HTTP request.
:param method: HTTP method
:type method: str
:param url: Target URL
:type url: str
:param query_parameters: Dictionary containing query parameters
:type query_parameters: Dict
:param token: Optional JWT token to be put in the Authorization header
:type token: Optional[str]
:return: Response object
:rtype: requests.models.Response
"""
self._logger.debug("Started creating a new request")
headers = self._get_request_headers(token)
request = Request(method, url, params=query_parameters, headers=headers)
self._logger.debug(
"Finished creating a new request: {0} ({1})".format(request, request.url)
)
return request
@staticmethod
def _try_to_extract_json_from_response(response):
"""Try to extract a JSON document from the response.
NOTE: DownloadLink service doesn't always return a JSON document.
For open-access books it returns the book content.
:param response: Response object
:type response: requests.models.Response
:return: JSON document containing in the response (if any)
:rtype: Optional[Dict]
"""
try:
response_json = response.json()
return response_json
except ValueError:
return None
def _parse_response(self, response, must_be_json=False):
"""Parse the response and return a JSON document containing in it.
:param response: Response object
:type response: requests.models.Response
:param must_be_json: Boolean value specifying whether the response must contain a valid JSON document
:type must_be_json: bool
:return: 2-tuple containing the response and the JSON document containing in it (if any)
:rtype: Tuple[requests.models.Response, Optional[Dict]]
"""
response_json = self._try_to_extract_json_from_response(response)
if response.status_code != requests.codes.ok and response_json:
self._logger.error("Request failed: {0}".format(response_json))
response.raise_for_status()
if not response_json:
if must_be_json:
raise ProQuestAPIInvalidJSONResponseError(response)
return response, None
if self.RESPONSE_STATUS_CODE_FIELD not in response_json:
raise ProQuestAPIMissingJSONPropertyError(
response, self.RESPONSE_STATUS_CODE_FIELD
)
status_code = response_json[self.RESPONSE_STATUS_CODE_FIELD]
if status_code != requests.codes.ok:
raise HTTPError(
"Request failed with {0} code".format(status_code), response=response
)
return response, response_json
def _send_request(
self,
configuration,
method,
url,
query_parameters,
token=None,
response_must_be_json=False,
):
"""Send an HTTP requests, check the result code and return the response.
:param configuration: Configuration object
:type configuration: ProQuestAPIClientConfiguration
:param method: HTTP method
:type method: str
:param url: Target URL
:type url: str
:param query_parameters: Dictionary containing query parameters
:type query_parameters: Dict
:param token: Optional JWT token to be put in the Authorization header
:type token: Optional[str]
:param response_must_be_json: Boolean value specifying whether the response must contain a valid JSON document
:type response_must_be_json: bool
:return: 2-tuple containing the response and the JSON document containing in it (if any)
:rtype: Tuple[requests.models.Response, Optional[Dict]]
"""
self._logger.debug(
"Started sending {0} HTTP request to {1} with the following parameters: {2}".format(
method, url, query_parameters
)
)
request = self._create_request(method, url, query_parameters, token)
proxies = self._get_request_proxies(configuration)
with requests.sessions.Session() as session:
request = session.prepare_request(request)
response = session.send(request, proxies=proxies)
self._logger.debug("Received the following response: {0}".format(response))
response, response_json = self._parse_response(response, response_must_be_json)
self._logger.debug(
"Finished sending {0} HTTP request to {1} with the following parameters: {2}".format(
method, url, query_parameters
)
)
return response, response_json
def _download_feed_page(self, configuration, page, hits_per_page):
"""Download a single page of a paginated OPDS 2.0 feed.
:param configuration: Configuration object
:type configuration: ProQuestAPIClientConfiguration
:param page: Page index (max = 32,767)
:type page: int
:param hits_per_page: Number of publications on a single page (max = 32,767)
:type hits_per_page: int
:return: Python dictionary object containing the feed's page
:rtype: dict
"""
self._logger.info(
"Started downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed from {2}".format(
page, hits_per_page, configuration.books_catalog_service_url
)
)
parameters = {"page": page, "hitsPerPage": hits_per_page}
response, response_json = self._send_request(
configuration,
"get",
configuration.books_catalog_service_url,
parameters,
response_must_be_json=True,
)
self._logger.info(
"Finished downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed from {2}".format(
page, hits_per_page, configuration.books_catalog_service_url
)
)
if self.RESPONSE_OPDS_FEED_FIELD not in response_json:
raise ProQuestAPIMissingJSONPropertyError(
response, self.RESPONSE_OPDS_FEED_FIELD
)
return response_json[self.RESPONSE_OPDS_FEED_FIELD]
[docs] def download_feed_page(self, db, page, hits_per_page):
"""Download a single page of a paginated OPDS 2.0 feed.
:param db: Database session
:type db: sqlalchemy.orm.session.Session
:param page: Page index (max = 32,766)
:type page: int
:param hits_per_page: Number of publications on a single page (max = 32,766)
:type hits_per_page: int
:return: Python dictionary object containing the feed's page
:rtype: dict
"""
if not is_session(db):
raise ValueError('"db" argument must be a valid SQLAlchemy session')
if not isinstance(page, int):
raise ValueError('"page" argument must be an integer')
if page < 0 or page > self.MAX_PAGE_INDEX:
raise ValueError(
"Page argument must a non-negative number less than {0}".format(
self.MAX_PAGE_INDEX
)
)
if not isinstance(hits_per_page, int):
raise ValueError('"hits_per_page" argument must be an integer')
if hits_per_page < 0 or hits_per_page > self.MAX_PAGE_SIZE:
raise ValueError(
"Hits per page argument must a non-negative number less than {0}".format(
self.MAX_PAGE_SIZE
)
)
self._logger.info(
"Started downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed ".format(
page, hits_per_page
)
)
with self._get_configuration(db) as configuration:
feed = self._download_feed_page(configuration, page, hits_per_page)
self._logger.info(
"Finished downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed".format(
page, hits_per_page
)
)
return feed
[docs] def download_all_feed_pages(self, db):
"""Download all available feed pages.
:param db: Database session
:type db: sqlalchemy.orm.session.Session
:return: Iterable list of feed pages in a form of Python dictionaries
:rtype: Iterable[dict]
"""
if not is_session(db):
raise ValueError('"db" argument must be a valid SQLAlchemy session')
self._logger.info(
"Started downloading all of the pages of a paginated OPDS 2.0 feed"
)
with self._get_configuration(db) as configuration:
page = 1
while True:
try:
feed = self._download_feed_page(
configuration, page, configuration.page_size
)
page += 1
yield feed
except HTTPError as error:
self._logger.debug(
"Got an HTTP error {0}, assuming we reached the end of the feed".format(
error
)
)
break
except ProQuestAPIInvalidJSONResponseError:
self._logger.exception(
"Got unexpected ProQuestAPIIncorrectResponseError, assuming we reached the end of the feed"
)
break
self._logger.info(
"Finished downloading all of the pages of a paginated OPDS 2.0 feed"
)
[docs] def create_token(self, db, affiliation_id):
"""Create a new JWT bearer token.
:param db: Database session
:type db: sqlalchemy.orm.session.Session
:param affiliation_id: SAML affiliation ID used as a patron's unique identifier by ProQuest
:type affiliation_id: str
:return: New JWT bearer token
:rtype: str
"""
if not is_session(db):
raise ValueError('"db" argument must be a valid SQLAlchemy session')
if not affiliation_id or not isinstance(affiliation_id, str):
raise ValueError('"affiliation_id" argument must be a non-empty string')
self._logger.info(
"Started creating a new JWT bearer token for affiliation ID {0}".format(
affiliation_id
)
)
with self._get_configuration(db) as configuration:
parameters = {"userName": affiliation_id}
response, response_json = self._send_request(
configuration,
"get",
configuration.partner_auth_token_service_url,
parameters,
response_must_be_json=True,
)
self._logger.info(
"Finished creating a new JWT bearer token for affiliation ID {0}: {1}".format(
affiliation_id, response_json
)
)
if self.TOKEN_FIELD not in response_json:
raise ProQuestAPIMissingJSONPropertyError(response, self.TOKEN_FIELD)
return response_json[self.TOKEN_FIELD]
[docs] def get_book(self, db, token, document_id):
"""Get a book by it's ProQuest Doc ID.
NOTE: There are two different cases to consider:
- Open-access books: in this case ProQuest API returns the book content.
- Adobe DRM copy protected books: in this case ProQuest API returns an ACSM file containing
information about downloading a digital publication.
:param db: Database session
:type db: sqlalchemy.orm.session.Session
:param token: JWT bearer token created using `ProQuestAPIClient.create_token` method
:type token: str
:param document_id: ProQuest Doc ID
:type document_id: str
:return: Book instance containing either an ACS link to the book or the book content
:rtype: ProQuestBook
"""
if not is_session(db):
raise ValueError('"db" argument must be a valid SQLAlchemy session')
if not token or not isinstance(token, str):
raise ValueError('"token" argument must be a non-empty string')
if not document_id or not isinstance(document_id, str):
raise ValueError('"document_id" must be a non-empty string')
self._logger.info(
"Started fetching a book link for Doc ID {0} using JWT token {1}".format(
document_id, token
)
)
with self._get_configuration(db) as configuration:
parameters = {"docID": document_id}
response, response_json = self._send_request(
configuration,
"get",
configuration.download_link_service_url,
parameters,
token,
)
if response_json:
self._logger.info(
"Finished fetching a download link for Doc ID {0} using JWT token {1}: {2}".format(
document_id, token, response_json
)
)
if self.DOWNLOAD_LINK_FIELD not in response_json:
raise ProQuestAPIMissingJSONPropertyError(
response, self.DOWNLOAD_LINK_FIELD
)
# The API returns another link leading to either a DRM-free book or ACSM file:
# - DRM-free books are publicly accessible, meaning that their download links
# are not protected by IP whitelisting and we shall pass the link to the client
# to avoid proxying the content through Circulation Manager.
# - DRM-protected download links are protected by IP whitelisting
# and can be called only from Circulation Manager,
# meaning that Circulation Manager has to download an ACSM file
# and proxy it to the client.
# However, it shouldn't incur any bad consequences because
# ACSM files are usually relatively small.
link = response_json[self.DOWNLOAD_LINK_FIELD]
# In the case of DRM-free books we return a link immediately
# and we'll pass it to the client app.
if self.DRM_FREE_DOWNLOAD_LINK_KEYWORD in link:
return ProQuestBook(link=link)
# In the case of Adobe DRM-protected books we have to download an ACSM file
# and pass its content to the client app.
response, _ = self._send_request(
configuration, "get", link, {}, token, response_must_be_json=False
)
self._logger.info(
"Finished fetching an ACSM file for Doc ID {0} using JWT token {1}".format(
document_id, token
)
)
return ProQuestBook(
content=bytes(response.content),
content_type=DeliveryMechanism.ADOBE_DRM,
)
else:
self._logger.info(
"Finished fetching an open-access book for Doc ID {0} using JWT token {1}".format(
document_id, token
)
)
return ProQuestBook(content=bytes(response.content))
[docs]class ProQuestAPIClientFactory(object):
"""Factory used for creating ProQuestAPIClient instances."""
[docs] def create(self, integration_association):
"""Create a new instance of ProQuestAPIClientFactory.
:param integration_association: Association with an external integration
:type integration_association: core.model.configuration.HasExternalIntegration
:return: New instance of ProQuestAPIClient
:rtype: ProQuestAPIClient
"""
configuration_storage = ConfigurationStorage(integration_association)
configuration_factory = ConfigurationFactory()
client = ProQuestAPIClient(configuration_storage, configuration_factory)
return client