import json
import logging
import urllib.request
import urllib.parse
import urllib.error
from collections import Counter
from flask_babel import lazy_gettext as _
from core.config import (
CannotLoadConfiguration,
Configuration,
)
from core.coverage import (
CoverageFailure,
IdentifierCoverageProvider,
)
from core.metadata_layer import (
ContributorData,
IdentifierData,
LinkData,
MeasurementData,
Metadata,
SubjectData,
)
from core.model import (
DataSource,
ExternalIntegration,
Hyperlink,
Identifier,
Measurement,
Representation,
Session,
Subject,
get_one,
Equivalency,
LicensePool,
Collection,
Edition,
Contributor,
Contribution,
)
from core.util import TitleProcessor
from sqlalchemy.sql import (
select,
join,
and_,
or_,
)
from sqlalchemy.orm import aliased
from core.util.http import HTTP
[docs]class NoveListAPI(object):
PROTOCOL = ExternalIntegration.NOVELIST
NAME = _("Novelist API")
# Hardcoded authentication key used as a Header for calling the NoveList
# Collections API. It identifies the client, and lets NoveList know that
# SimplyE is making the requests.
AUTHORIZED_IDENTIFIER = "62521fa1-bdbb-4939-84aa-aee2a52c8d59"
SETTINGS = [
{"key": ExternalIntegration.USERNAME,
"label": _("Profile"), "required": True},
{"key": ExternalIntegration.PASSWORD,
"label": _("Password"), "required": True},
]
# Different libraries may have different NoveList integrations
# on the same circulation manager.
SITEWIDE = False
IS_CONFIGURED = None
_configuration_library_id = None
log = logging.getLogger("NoveList API")
version = "2.2"
NO_ISBN_EQUIVALENCY = "No clear ISBN equivalency: %r"
# While the NoveList API doesn't require parameters to be passed via URL,
# the Representation object needs a unique URL to return the proper data
# from the database.
QUERY_ENDPOINT = (
"https://novselect.ebscohost.com/Data/ContentByQuery?"
"ISBN=%(ISBN)s&ClientIdentifier=%(ClientIdentifier)s&version=%(version)s"
)
COLLECTION_DATA_API = "http://www.noveListcollectiondata.com/api/collections"
AUTH_PARAMS = "&profile=%(profile)s&password=%(password)s"
MAX_REPRESENTATION_AGE = 7*24*60*60 # one week
currentQueryIdentifier = None
medium_to_book_format_type_values = {
Edition.BOOK_MEDIUM: "EBook",
Edition.AUDIO_MEDIUM: "Audiobook",
}
[docs] @classmethod
def from_config(cls, library):
profile, password = cls.values(library)
if not (profile and password):
raise CannotLoadConfiguration(
"No NoveList integration configured for library (%s)." % library.short_name
)
_db = Session.object_session(library)
return cls(_db, profile, password)
[docs] @classmethod
def values(cls, library):
_db = Session.object_session(library)
integration = ExternalIntegration.lookup(
_db, ExternalIntegration.NOVELIST,
ExternalIntegration.METADATA_GOAL, library=library
)
if not integration:
return (None, None)
profile = integration.username
password = integration.password
return (profile, password)
def __init__(self, _db, profile, password):
self._db = _db
self.profile = profile
self.password = password
@property
def source(self):
return DataSource.lookup(self._db, DataSource.NOVELIST)
[docs] def lookup_equivalent_isbns(self, identifier):
"""Finds NoveList data for all ISBNs equivalent to an identifier.
:return: Metadata object or None
"""
lookup_metadata = []
license_sources = DataSource.license_sources_for(self._db, identifier)
# Find strong ISBN equivalents.
isbns = list()
for license_source in license_sources:
isbns += [eq.output for eq in identifier.equivalencies if (
eq.data_source == license_source and
eq.strength == 1 and
eq.output.type == Identifier.ISBN
)]
if not isbns:
self.log.warning(
("Identifiers without an ISBN equivalent can't"
"be looked up with NoveList: %r"), identifier
)
return None
# Look up metadata for all equivalent ISBNs.
lookup_metadata = list()
for isbn in isbns:
metadata = self.lookup(isbn)
if metadata:
lookup_metadata.append(metadata)
if not lookup_metadata:
self.log.warning(
("No NoveList metadata found for Identifiers without an ISBN"
"equivalent can't be looked up with NoveList: %r"), identifier
)
return None
best_metadata, confidence = self.choose_best_metadata(
lookup_metadata, identifier
)
if best_metadata:
if round(confidence, 2) < 0.5:
self.log.warning(self.NO_ISBN_EQUIVALENCY, identifier)
return None
return metadata
@classmethod
def _confirm_same_identifier(self, metadata_objects):
"""Ensures that all metadata objects have the same NoveList ID"""
novelist_ids = set([
metadata.primary_identifier.identifier
for metadata in metadata_objects
])
return len(novelist_ids) == 1
[docs] def lookup(self, identifier, **kwargs):
"""Requests NoveList metadata for a particular identifier
:param kwargs: Keyword arguments passed into Representation.post().
:return: Metadata object or None
"""
client_identifier = identifier.urn
if identifier.type != Identifier.ISBN:
return self.lookup_equivalent_isbns(identifier)
params = dict(
ClientIdentifier=client_identifier, ISBN=identifier.identifier,
version=self.version, profile=self.profile, password=self.password
)
scrubbed_url = str(self.scrubbed_url(params))
url = self.build_query_url(params)
self.log.debug("NoveList lookup: %s", url)
# We want to make an HTTP request for `url` but cache the
# result under `scrubbed_url`. Define a 'URL normalization'
# function that always returns `scrubbed_url`.
def normalized_url(original):
return scrubbed_url
representation, from_cache = Representation.post(
_db=self._db, url=str(url), data='',
max_age=self.MAX_REPRESENTATION_AGE,
response_reviewer=self.review_response,
url_normalizer=normalized_url, **kwargs
)
# Commit to the database immediately to reduce the chance
# that some other incoming request will try to create a
# duplicate Representation and crash.
self._db.commit()
return self.lookup_info_to_metadata(representation)
[docs] @classmethod
def review_response(cls, response):
"""Performs NoveList-specific error review of the request response"""
status_code, headers, content = response
if status_code == 403:
raise Exception("Invalid NoveList credentials")
if content.startswith(b'"Missing'):
raise Exception("Invalid NoveList parameters: %s" %
content.decode("utf-8"))
return response
[docs] @classmethod
def scrubbed_url(cls, params):
"""Removes authentication details from cached Representation.url"""
return cls.build_query_url(params, include_auth=False)
@classmethod
def _scrub_subtitle(cls, subtitle):
"""Removes common NoveList subtitle annoyances"""
if subtitle:
subtitle = subtitle.replace('[electronic resource]', '')
# Then get rid of any leading whitespace or punctuation.
subtitle = TitleProcessor.extract_subtitle('', subtitle)
return subtitle
[docs] @classmethod
def build_query_url(cls, params, include_auth=True):
"""Builds a unique and url-encoded query endpoint"""
url = cls.QUERY_ENDPOINT
if include_auth:
url += cls.AUTH_PARAMS
urlencoded_params = dict()
for name, value in list(params.items()):
urlencoded_params[name] = urllib.parse.quote(value)
return url % urlencoded_params
def _extract_isbns(self, book_info):
isbns = []
synonymous_ids = book_info.get('manifestations')
for synonymous_id in synonymous_ids:
isbn = synonymous_id.get('ISBN')
if isbn:
isbn_data = IdentifierData(Identifier.ISBN, isbn)
isbns.append(isbn_data)
return isbns
[docs] def get_recommendations(self, metadata, recommendations_info):
if not recommendations_info:
return metadata
related_books = recommendations_info.get('titles')
related_books = [b for b in related_books if b.get('is_held_locally')]
if related_books:
for book_info in related_books:
metadata.recommendations += self._extract_isbns(book_info)
return metadata
[docs] def get_items_from_query(self, library):
"""Gets identifiers and its related title, medium, and authors from the
database.
Keeps track of the current 'ISBN' identifier and current item object that
is being processed. If the next ISBN being processed is new, the existing one
gets added to the list of items. If the ISBN is the same, then we append
the Author property since there are multiple contributors.
:return: a list of Novelist objects to send
"""
collectionList = []
for c in library.collections:
collectionList.append(c.id)
LEFT_OUTER_JOIN = True
i1 = aliased(Identifier)
i2 = aliased(Identifier)
roles = list(Contributor.AUTHOR_ROLES)
roles.append(Contributor.NARRATOR_ROLE)
isbnQuery = select(
[i1.identifier, i1.type, i2.identifier,
Edition.title, Edition.medium, Edition.published,
Contribution.role, Contributor.sort_name,
DataSource.name],
).select_from(
join(LicensePool, i1, i1.id == LicensePool.identifier_id)
.join(Equivalency, i1.id == Equivalency.input_id, LEFT_OUTER_JOIN)
.join(i2, Equivalency.output_id == i2.id, LEFT_OUTER_JOIN)
.join(
Edition,
or_(Edition.primary_identifier_id == i1.id,
Edition.primary_identifier_id == i2.id)
)
.join(Contribution, Edition.id == Contribution.edition_id)
.join(Contributor, Contribution.contributor_id == Contributor.id)
.join(DataSource, DataSource.id == LicensePool.data_source_id)
).where(
and_(
LicensePool.collection_id.in_(collectionList),
or_(i1.type == "ISBN", i2.type == "ISBN"),
or_(Contribution.role.in_(roles))
)
).order_by(i1.identifier, i2.identifier)
result = self._db.execute(isbnQuery)
items = []
newItem = None
existingItem = None
currentIdentifier = None
# Loop through the query result. There's a need to keep track of the
# previously processed object and the currently processed object because
# the identifier could be the same. If it is, we update the data
# object to send to Novelist.
for item in result:
if newItem:
existingItem = newItem
(currentIdentifier, existingItem, newItem, addItem) = (
self.create_item_object(item, currentIdentifier, existingItem)
)
if addItem and existingItem:
# The Role property isn't needed in the actual request.
del existingItem['role']
items.append(existingItem)
# For the case when there's only one item in `result`
if newItem:
del newItem['role']
items.append(newItem)
return items
[docs] def create_item_object(self, object, currentIdentifier, existingItem):
"""Returns a new item if the current identifier that was processed
is not the same as the new object's ISBN being processed. If the new
object's ISBN matches the current identifier, the previous object's
Author property is updated.
:param object: the current item object to process
:param currentIdentifier: the current identifier to process
:param existingItem: the previously processed item object
:return: (
current identifier,
the existing object if available,
a new object if the item wasn't found before,
if the item is ready to the added to the list of books to send
)
"""
if not object:
return (None, None, None, False)
if (object[1] == Identifier.ISBN):
isbn = object[0]
elif object[2] is not None:
isbn = object[2]
else:
# We cannot find an ISBN for this work -- probably due to
# a data error.
return (None, None, None, False)
roles = list(Contributor.AUTHOR_ROLES)
roles.append(Contributor.NARRATOR_ROLE)
role = object[6]
author_or_narrator = object[7] if role in roles else ""
distributor = object[8]
# If there's no existing author value but we now get one, add it.
# If the role is narrator and it's a new value
# (i.e. no "narrator" was already added), then add the narrator.
# If we encounter an existing ISBN and its role is "Primary Author",
# then that value overrides the existing Author property.
if isbn == currentIdentifier and existingItem:
if not existingItem.get('author') and role in Contributor.AUTHOR_ROLES:
existingItem['author'] = author_or_narrator
if not existingItem.get('narrator') and role == Contributor.NARRATOR_ROLE:
existingItem['narrator'] = author_or_narrator
if role == Contributor.PRIMARY_AUTHOR_ROLE:
existingItem['author'] = author_or_narrator
existingItem['role'] = role
# Always return False to keep processing the currentIdentifier until
# we get a new ISBN to process. In that case, return and add all
# the data we've accumulated for this object.
return (currentIdentifier, existingItem, None, False)
else:
# If we encounter a new ISBN, we take whatever values are initially given.
title = object[3]
mediaType = self.medium_to_book_format_type_values.get(
object[4], "")
newItem = dict(
isbn=isbn,
title=title,
mediaType=mediaType,
role=role,
distributor=distributor
)
publicationDate = object[5]
if publicationDate:
publicationDateString = publicationDate.isoformat().replace("-", "")
newItem["publicationDate"] = publicationDateString
# If we are processing a new item and there is an existing item,
# then we can add the existing item to the list and keep
# the current new item for further data aggregation.
addItem = True if existingItem else False
if role in Contributor.AUTHOR_ROLES:
newItem['author'] = author_or_narrator
if role == Contributor.NARRATOR_ROLE:
newItem['narrator'] = author_or_narrator
return (isbn, existingItem, newItem, addItem)
[docs] def put_items_novelist(self, library):
items = self.get_items_from_query(library)
content = None
if items:
data = json.dumps(self.make_novelist_data_object(items))
response = self.put(
self.COLLECTION_DATA_API,
{
"AuthorizedIdentifier": self.AUTHORIZED_IDENTIFIER,
"Content-Type": "application/json; charset=utf-8"
},
data=data
)
if (response.status_code == 200):
content = json.loads(response.content)
logging.info(
"Success from NoveList: %r", response.content
)
else:
logging.error("Data sent was: %r", data)
logging.error(
"Error %s from NoveList: %r", response.status_code,
response.content
)
return content
[docs] def make_novelist_data_object(self, items):
return {
"customer": "%s:%s" % (self.profile, self.password),
"records": items,
}
[docs] def put(self, url, headers, **kwargs):
data = kwargs.get('data')
if 'data' in kwargs:
del kwargs['data']
# This might take a very long time -- disable the normal
# timeout.
kwargs['timeout'] = None
response = HTTP.put_with_timeout(
url, data, headers=headers, **kwargs
)
return response
[docs]class MockNoveListAPI(NoveListAPI):
def __init__(self, _db, *args, **kwargs):
self._db = _db
self.responses = []
[docs] def setup_method(self, *args):
self.responses = self.responses + list(args)
[docs] def lookup(self, identifier):
if not self.responses:
return []
response = self.responses[0]
self.responses = self.responses[1:]
return response