# encoding: utf-8
# CachedFeed, WillNotGenerateExpensiveFeed
from . import (
Base,
flush,
get_one,
get_one_or_create,
)
from collections import namedtuple
import datetime
import logging
from sqlalchemy import (
Column,
DateTime,
ForeignKey,
Index,
Integer,
Unicode,
)
from sqlalchemy.sql.expression import (
and_,
)
from ..util.flask_util import OPDSFeedResponse
from ..util.datetime_helpers import utc_now
[docs]class CachedFeed(Base):
__tablename__ = 'cachedfeeds'
id = Column(Integer, primary_key=True)
# Every feed is associated with a lane. If null, this is a feed
# for a WorkList. If work_id is also null, it's a feed for the
# top-level.
lane_id = Column(
Integer, ForeignKey('lanes.id'),
nullable=True, index=True)
# Every feed has a timestamp reflecting when it was created.
timestamp = Column(DateTime(timezone=True), nullable=True, index=True)
# A feed is of a certain type--such as 'page' or 'groups'.
type = Column(Unicode, nullable=False)
# A feed associated with a WorkList can have a unique key.
# This should be null if the feed is associated with a Lane.
unique_key = Column(Unicode, nullable=True)
# A 'page' feed is associated with a set of values for the facet
# groups.
facets = Column(Unicode, nullable=True)
# A 'page' feed is associated with a set of values for pagination.
pagination = Column(Unicode, nullable=False)
# The content of the feed.
content = Column(Unicode, nullable=True)
# Every feed is associated with a Library.
library_id = Column(
Integer, ForeignKey('libraries.id'), index=True
)
# A feed may be associated with a Work.
work_id = Column(Integer, ForeignKey('works.id'),
nullable=True, index=True)
# Distinct types of feeds that might be cached.
GROUPS_TYPE = 'groups'
PAGE_TYPE = 'page'
NAVIGATION_TYPE = 'navigation'
CRAWLABLE_TYPE = 'crawlable'
RELATED_TYPE = 'related'
RECOMMENDATIONS_TYPE = 'recommendations'
SERIES_TYPE = 'series'
CONTRIBUTOR_TYPE = 'contributor'
# Special constants for cache durations.
CACHE_FOREVER = object()
IGNORE_CACHE = object()
log = logging.getLogger("CachedFeed")
[docs] @classmethod
def fetch(cls, _db, worklist, facets, pagination, refresher_method,
max_age=None, raw=False, **response_kwargs
):
"""Retrieve a cached feed from the database if possible.
Generate it from scratch and store it in the database if
necessary.
Return it in the most useful form to the caller.
:param _db: A database connection.
:param worklist: The WorkList associated with this feed.
:param facets: A Facets object that distinguishes this feed from
others (for instance, by its sort order).
:param pagination: A Pagination object that explains which
page of a larger feed is being cached.
:param refresher_method: A function to call if it turns out
the contents of the feed need to be regenerated. This
function must take no arguments and return an object that
implements __unicode__. (A Unicode string or an OPDSFeed is fine.)
:param max_age: If a cached feed is older than this, it will
be considered stale and regenerated. This may be either a
number of seconds or a timedelta. If no value is
specified, a default value will be calculated based on
WorkList and Facets configuration. Setting this value to
zero will force a refresh.
:param raw: If this is False (the default), a Response ready to be
converted into a Flask Response object will be returned. If this
is True, the CachedFeed object itself will be returned. In most
non-test situations the default is better.
:return: A Response or CachedFeed containing up-to-date content.
"""
# Gather the information necessary to uniquely identify this
# page of this feed.
keys = cls._prepare_keys(_db, worklist, facets, pagination)
# Calculate the maximum cache age, converting from timedelta
# to seconds if necessary.
max_age = cls.max_cache_age(worklist, keys.feed_type, facets, max_age)
# These arguments will probably be passed into get_one, and
# will be passed into get_one_or_create in the event of a cache
# miss.
# TODO: this constraint_clause might not be necessary anymore.
# ISTR it was an attempt to avoid race conditions, and we do a
# better job of that now.
constraint_clause = and_(cls.content!=None, cls.timestamp!=None)
kwargs = dict(
on_multiple='interchangeable',
constraint=constraint_clause,
type=keys.feed_type,
library=keys.library,
work=keys.work,
lane_id=keys.lane_id,
unique_key=keys.unique_key,
facets=keys.facets_key,
pagination=keys.pagination_key
)
feed_data = None
if (max_age is cls.IGNORE_CACHE or isinstance(max_age, int) and max_age <= 0):
# Don't even bother checking for a CachedFeed: we're
# just going to replace it.
feed_obj = None
else:
feed_obj = get_one(_db, cls, **kwargs)
should_refresh = cls._should_refresh(feed_obj, max_age)
if should_refresh:
# This is a cache miss. Either feed_obj is None or
# it's no good. We need to generate a new feed.
feed_data = str(refresher_method())
generation_time = utc_now()
if max_age is not cls.IGNORE_CACHE:
# Having gone through all the trouble of generating
# the feed, we want to cache it in the database.
# Since it can take a while to generate a feed, and we know
# that the feed in the database is stale, it's possible that
# another thread _also_ noticed that feed was stale, and
# generated a similar feed while we were working.
#
# To avoid a database error, fetch the feed _again_ from the
# database rather than assuming we have the up-to-date
# object.
feed_obj, is_new = get_one_or_create(_db, cls, **kwargs)
if feed_obj.timestamp is None or feed_obj.timestamp < generation_time:
# Either there was no contention for this object, or there
# was contention but our feed is more up-to-date than
# the other thread(s). Our feed takes priority.
feed_obj.content = feed_data
feed_obj.timestamp = generation_time
elif feed_obj:
feed_data = feed_obj.content
if raw and feed_obj:
return feed_obj
# We have the information necessary to create a useful
# response-type object.
#
# Set some defaults in case the caller didn't pass them in.
if isinstance(max_age, int):
response_kwargs.setdefault('max_age', max_age)
if max_age == cls.IGNORE_CACHE:
# If we were asked to ignore our internal cache, we should
# also tell the client not to store this document in _its_
# internal cache.
response_kwargs['max_age'] = 0
if keys.library and keys.library.has_root_lanes:
# If this feed is associated with a Library that guides
# patrons to different lanes based on their patron type,
# all CachedFeeds need to be treated as private (but
# cacheable) on the client side. Otherwise, a change of
# client credentials might cause a cached representation
# to be reused when it should have been discarded.
#
# TODO: it might be possible to make this decision in a
# more fine-grained way, which would allow intermediaries
# to cache these feeds.
response_kwargs['private'] = True
return OPDSFeedResponse(
response=feed_data,
**response_kwargs
)
[docs] @classmethod
def feed_type(cls, worklist, facets):
"""Determine the 'type' of the feed.
This may be defined either by `worklist` or by `facets`, with
`facets` taking priority.
:return: A string that can go into cachedfeeds.type.
"""
type = CachedFeed.PAGE_TYPE
if worklist:
type = worklist.CACHED_FEED_TYPE or type
if facets:
type = facets.CACHED_FEED_TYPE or type
return type
[docs] @classmethod
def max_cache_age(cls, worklist, type, facets, override=None):
"""Determine the number of seconds that a cached feed
of a given type can remain fresh.
Order of precedence: `override`, `facets`, `worklist`.
:param worklist: A WorkList which may have an opinion on this
topic.
:param type: The type of feed being generated.
:param facets: A faceting object that may have an opinion on this
topic.
:param override: A specific value passed in by the caller. This
may either be a number of seconds or a timedelta.
:return: A number of seconds, or CACHE_FOREVER or IGNORE_CACHE
"""
value = override
if value is None and facets is not None:
value = facets.max_cache_age
if value is None and worklist is not None:
value = worklist.max_cache_age(type)
if value in (cls.CACHE_FOREVER, cls.IGNORE_CACHE):
# Special caching rules apply.
return value
if value is None:
# Assume the feed should not be cached at all.
value = 0
if isinstance(value, datetime.timedelta):
value = value.total_seconds()
return value
@classmethod
def _should_refresh(cls, feed_obj, max_age):
"""Should we try to get a new representation of this CachedFeed?
:param feed_obj: A CachedFeed. This may be None, which is why
this is a class method.
:param max_age: Either a number of seconds, or one of the constants
CACHE_FOREVER or IGNORE_CACHE.
"""
should_refresh = False
if feed_obj is None:
# If we didn't find a CachedFeed (maybe because we didn't
# bother looking), we must always refresh.
should_refresh = True
elif max_age == cls.IGNORE_CACHE:
# If we are ignoring the cache, we must always refresh.
should_refresh = True
elif max_age == cls.CACHE_FOREVER:
# If we found *anything*, and the cache time is CACHE_FOREVER,
# we will never refresh.
should_refresh = False
elif (feed_obj.timestamp
and feed_obj.timestamp + datetime.timedelta(seconds=max_age) <=
utc_now()
):
# Here it comes down to a date comparison: how old is the
# CachedFeed?
should_refresh = True
return should_refresh
# This named tuple makes it easy to manage the return value of
# _prepare_keys.
CachedFeedKeys = namedtuple(
'CachedFeedKeys',
['feed_type', 'library', 'work', 'lane_id', 'unique_key', 'facets_key',
'pagination_key']
)
@classmethod
def _prepare_keys(cls, _db, worklist, facets, pagination):
"""Prepare various unique keys that will go into the database
and be used to distinguish CachedFeeds from one another.
This is kept in a helper method for ease of testing.
:param worklist: A WorkList.
:param facets: A Facets object.
:param pagination: A Pagination object.
:return: A CachedFeedKeys object.
"""
if not worklist:
raise ValueError(
"Cannot prepare a CachedFeed without a WorkList."
)
feed_type = cls.feed_type(worklist, facets)
# The Library is the one associated with `worklist`.
library = worklist.get_library(_db)
# A feed may be associated with a specific Work,
# e.g. recommendations for readers of that Work.
work = getattr(worklist, 'work', None)
# Either lane_id or unique_key must be set, but not both.
from ..lane import Lane
if isinstance(worklist, Lane):
lane_id = worklist.id
unique_key = None
else:
lane_id = None
unique_key = worklist.unique_key
facets_key = ""
if facets is not None:
if isinstance(facets.query_string, bytes):
facets_key = facets.query_string.decode("utf-8")
else:
facets_key = facets.query_string
pagination_key = ""
if pagination is not None:
if isinstance(pagination.query_string, bytes):
pagination_key = pagination.query_string.decode("utf-8")
else:
pagination_key = pagination.query_string
return cls.CachedFeedKeys(
feed_type=feed_type, library=library, work=work, lane_id=lane_id,
unique_key=unique_key, facets_key=facets_key,
pagination_key=pagination_key
)
[docs] def update(self, _db, content):
self.content = content
self.timestamp = utc_now()
flush(_db)
def __repr__(self):
if self.content:
length = len(self.content)
else:
length = "No content"
return "<CachedFeed #%s %s %s %s %s %s %s >" % (
self.id, self.lane_id, self.type,
self.facets, self.pagination,
self.timestamp, length
)
Index(
"ix_cachedfeeds_library_id_lane_id_type_facets_pagination",
CachedFeed.library_id, CachedFeed.lane_id, CachedFeed.type,
CachedFeed.facets, CachedFeed.pagination
)
[docs]class WillNotGenerateExpensiveFeed(Exception):
"""This exception is raised when a feed is not cached, but it's too
expensive to generate.
"""
pass
[docs]class CachedMARCFile(Base):
"""A record that a MARC file has been created and cached for a particular lane."""
__tablename__ = 'cachedmarcfiles'
id = Column(Integer, primary_key=True)
# Every MARC file is associated with a library and a lane. If the
# lane is null, the file is for the top-level WorkList.
library_id = Column(
Integer, ForeignKey('libraries.id'),
nullable=False, index=True)
lane_id = Column(
Integer, ForeignKey('lanes.id'),
nullable=True, index=True)
# The representation for this file stores the URL where it was mirrored.
representation_id = Column(
Integer, ForeignKey('representations.id'),
nullable=False)
start_time = Column(DateTime(timezone=True), nullable=True, index=True)
end_time = Column(DateTime(timezone=True), nullable=True, index=True)